[llvm-branch-commits] [llvm-branch] r163727 [1/2] - in /llvm/branches/AMDILBackend: include/llvm/CodeGen/ lib/Target/AMDIL/ lib/Target/AMDIL/TargetInfo/
Victor Oliveira
Victor.Oliveira at amd.com
Wed Sep 12 10:43:35 PDT 2012
Author: victorm
Date: Wed Sep 12 12:43:34 2012
New Revision: 163727
URL: http://llvm.org/viewvc/llvm-project?rev=163727&view=rev
Log:
merging with our internal tree, formatting and bugs
Modified:
llvm/branches/AMDILBackend/include/llvm/CodeGen/SelectionDAG.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem32.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMem64.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILModuleInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMultiClass.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNIDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILNodes.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILOperands.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPatterns.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManager.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPointerManagerImpl.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILPrintfConvert.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILProfiles.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsScalar.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV2.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterDefsV4.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterInfo.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalar.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarW.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarX.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarY.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesScalarZ.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2XY.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV2ZW.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILRegisterUsesV4.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIAsmPrinter.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIIOExpansion.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSIPointerManager.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSubtarget.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILSwizzleEncoder.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTNDevice.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILTargetMachine.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILUtilityFunctions.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILUtilityFunctions.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILVersion.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/Processors.td
llvm/branches/AMDILBackend/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodata.cpp
llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodata.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodb.h
llvm/branches/AMDILBackend/lib/Target/AMDIL/macrodb_gen.h
Modified: llvm/branches/AMDILBackend/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/include/llvm/CodeGen/SelectionDAG.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/branches/AMDILBackend/include/llvm/CodeGen/SelectionDAG.h Wed Sep 12 12:43:34 2012
@@ -614,6 +614,18 @@
return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
}
+ // getSelect - Helper function to make it easier to build Select's if you just
+ // have operands and don't want to check for vector.
+ SDValue getSelect(DebugLoc DL, EVT VT, SDValue Cond,
+ SDValue LHS, SDValue RHS) {
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Cannot use select on differing types");
+ assert(VT.isVector() == LHS.getValueType().isVector() &&
+ "Cannot mix vectors and scalars");
+ return getNode(Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT,
+ DL, VT, Cond, LHS, RHS);
+ }
+
/// getSelectCC - Helper function to make it easier to build SelectCC's if you
/// just have an ISD::CondCode instead of an SDValue.
///
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL.h Wed Sep 12 12:43:34 2012
@@ -22,6 +22,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Target/TargetMachine.h"
+#define AMD_LLVM_PUBLIC
+
#define AMDIL_MAJOR_VERSION 3
#define AMDIL_MINOR_VERSION 1
#define AMDIL_REVISION_NUMBER 104
@@ -45,59 +47,34 @@
#define DEFAULT_SCRATCH_ID 1
#define DEFAULT_VEC_SLOTS 8
-// SC->CAL version matchings.
-#define CAL_CACHED_ALIGNED_UAVS 1679
-#define CAL_VERSION_SC_156 1650
-#define CAL_VERSION_SC_155 1630
-#define CAL_VERSION_SC_154 1624
-#define CAL_VERSION_SC_153 1616
-#define CAL_VERSION_SC_152 1603
-#define CAL_VERSION_SC_151 1589
-#define CAL_VERSION_SC_150 1561
-#define CAL_VERSION_SC_149 CAL_VERSION_SC_150
-#define CAL_VERSION_SC_148 1525
-#define CAL_VERSION_SC_147 CAL_VERSION_SC_148
-#define CAL_VERSION_SC_146 CAL_VERSION_SC_148
-#define CAL_VERSION_SC_145 1451
-#define CAL_VERSION_SC_144 CAL_VERSION_SC_145
-#define CAL_VERSION_SC_143 1441
-#define CAL_VERSION_SC_142 CAL_VERSION_SC_142
-#define CAL_VERSION_SC_141 1420
-#define CAL_VERSION_SC_140 1400
-#define CAL_VERSION_SC_139 1387
-#define CAL_VERSION_SC_138 CAL_VERSION_SC_139
-#define CAL_APPEND_BUFFER_SUPPORT 1340
-#define CAL_VERSION_SC_137 1331
-#define CAL_VERSION_SC_136 982
-#define CAL_VERSION_SC_135 950
-#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
-
-#define OCL_DEVICE_RV710 0x00001
-#define OCL_DEVICE_RV730 0x00002
-#define OCL_DEVICE_RV770 0x00004
-#define OCL_DEVICE_CEDAR 0x00008
-#define OCL_DEVICE_REDWOOD 0x00010
-#define OCL_DEVICE_JUNIPER 0x00020
-#define OCL_DEVICE_CYPRESS 0x00040
-#define OCL_DEVICE_CAICOS 0x00080
-#define OCL_DEVICE_TURKS 0x00100
-#define OCL_DEVICE_BARTS 0x00200
-#define OCL_DEVICE_CAYMAN 0x00400
-#define OCL_DEVICE_TAHITI 0x00800
-#define OCL_DEVICE_PITCAIRN 0x01000
-#define OCL_DEVICE_CAPEVERDE 0x02000
-#define OCL_DEVICE_TRINITY 0x04000
-#define OCL_DEVICE_DOGS 0x08000
-#define OCL_DEVICE_CATS 0x10000
-#define OCL_DEVICE_BUNNIES 0x20000
-#define OCL_DEVICE_ALL 0xFFFFF
+#define OCL_DEVICE_RV710 0x000001
+#define OCL_DEVICE_RV730 0x000002
+#define OCL_DEVICE_RV770 0x000004
+#define OCL_DEVICE_CEDAR 0x000008
+#define OCL_DEVICE_REDWOOD 0x000010
+#define OCL_DEVICE_JUNIPER 0x000020
+#define OCL_DEVICE_CYPRESS 0x000040
+#define OCL_DEVICE_CAICOS 0x000080
+#define OCL_DEVICE_TURKS 0x000100
+#define OCL_DEVICE_BARTS 0x000200
+#define OCL_DEVICE_CAYMAN 0x000400
+#define OCL_DEVICE_TAHITI 0x000800
+#define OCL_DEVICE_PITCAIRN 0x001000
+#define OCL_DEVICE_CAPEVERDE 0x002000
+#define OCL_DEVICE_TRINITY 0x004000
+#define OCL_DEVICE_DOGS 0x008000
+#define OCL_DEVICE_CATS 0x010000
+#define OCL_DEVICE_BUNNIES 0x020000
+#define OCL_DEVICE_CASPER 0x040000
+#define OCL_DEVICE_SLIMER 0x080000
+#define OCL_DEVICE_MICE 0x100000
+#define OCL_DEVICE_ALL 0xFFFFFF
/// The number of function ID's that are reserved for
/// internal compiler usage.
const unsigned int RESERVED_FUNCS = 1024;
-namespace llvm
-{
+namespace llvm {
class AMDILInstrPrinter;
class AMDILTargetMachine;
class FunctionPass;
@@ -122,7 +99,7 @@
FunctionPass*
createAMDILPointerManager(TargetMachine &TM, CodeGenOpt::Level OptLevel);
FunctionPass*
-createAMDILMachinePeephole(TargetMachine &TM, CodeGenOpt::Level OptLevel);
+createAMDILMachinePeephole();
/// Pre emit passes.
FunctionPass* createMachinePostDominatorTreePass();
@@ -138,8 +115,9 @@
createAMDILSwizzleEncoder(TargetMachine &TM, CodeGenOpt::Level OptLevel);
/// Instruction Emission Passes
-AMDILInstPrinter *createAMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI);
+AMDILInstPrinter *createAMDILInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI);
extern Target TheAMDILTarget;
} // end namespace llvm;
@@ -152,16 +130,74 @@
/// Include device information enumerations
#include "AMDILDeviceInfo.h"
-namespace llvm
-{
+namespace llvm {
+// AMDIL Instruction descriptor flags
+namespace AMDID {
+enum {
+ EMPTY = 0,
+ SEXTLOAD = 1,
+ ZEXTLOAD = 2,
+ LOAD = 3,
+ STORE = 4,
+ TRUNCATE = 5,
+ ATOMIC = 6,
+ ADDR64 = 7,
+ GLOBAL = 8,
+ PRIVATE = 9,
+ CONSTANT = 10,
+ CPOOL = 11,
+ REGION = 12,
+ LOCAL = 13,
+ GDS = 14,
+ LDS = 15,
+ CBMEM = 16,
+ SCRATCH = 17,
+ RAWUAV = 18,
+ ARENAUAV = 19,
+ IMAGE = 20,
+ INFO0 = 21,
+ INFO1 = 22,
+ TXLD = 23,
+ SEMA = 24,
+ APPEND = 25,
+ SWSEXTLD = 26,
+ LOADCONST = 27,
+ IEEE = 28,
+ ZEROOP = 29,
+ FLAT = 30,
+ SWZLSHFT = 31,
+ SWZLDST = (SWZLSHFT + 0),
+ SWZLSRC0 = (SWZLSHFT + 1),
+ SWZLSRC1 = (SWZLSHFT + 2),
+ SWZLSRC2 = (SWZLSHFT + 3),
+ SWZLSRC3 = (SWZLSHFT + 4),
+ SWZLSRC4 = (SWZLSHFT + 5),
+ GWS = 37,
+ PACKED = 38,
+ SUB32BITS = 39,
+ TYPEI16 = 40,
+ TYPEV4 = 41,
+ VECTOR = 42
+}; // End anonymous enum.
+static const uint64_t SWZLMASK = (1ULL << SWZLDST) | (1ULL << SWZLSRC0)
+ | (1ULL << SWZLSRC1) | (1ULL << SWZLSRC2)
+ | (1ULL << SWZLSRC3) | (1ULL << SWZLSRC4);
+static const uint64_t AEXTLOAD = (1ULL << SEXTLOAD) | (1ULL << ZEXTLOAD);
+static const uint64_t INFO = (1ULL << INFO0) | (1ULL << INFO1);
+static const uint64_t EXTLOAD = AEXTLOAD | (1ULL << SWSEXTLD);
+static const uint64_t TYPEMASK = (1ULL << TYPEI16) | (1ULL << TYPEV4);
+static const uint64_t TYPEV2I8 = 0ULL;
+static const uint64_t TYPEV2I16 = (1ULL << TYPEI16);
+static const uint64_t TYPEV4I8 = (1ULL << TYPEV4);
+static const uint64_t TYPEV4I16 = TYPEMASK;
+} // end AMDID namespace.
/// OpenCL uses address spaces to differentiate between
/// various memory regions on the hardware. On the CPU
/// all of the address spaces point to the same memory,
/// however on the GPU, each address space points to
/// a seperate piece of memory that is unique from other
/// memory locations.
-namespace AMDILAS
-{
+namespace AMDILAS {
enum AddressSpaces {
PRIVATE_ADDRESS = 0, // Address space for private memory.
GLOBAL_ADDRESS = 1, // Address space for global memory.
@@ -185,37 +221,40 @@
struct {
#ifdef __BIG_ENDIAN__
unsigned short CacheableRead : 1; // Flag to specify if the read is
- // cacheable. (Permanent)
+ // cacheable. (Permanent)
unsigned short HardwareInst : 1; // Flag to specify that this instruction
- // is a hardware instruction. (Permanent)
+ // is a hardware instruction. (Permanent)
unsigned short ResourceID : 10; // Flag to specify the resource ID for
- // the op. (Permanent)
+ // the op. (Permanent)
unsigned short PointerPath : 1; // Flag to specify if the op is on the
- // pointer path.
+ // pointer path.
unsigned short ByteStore : 1; // Flag to specify if the op is byte
- // store op.
+ // store op.
unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
- // a conflict.
+ // a conflict.
unsigned short isImage : 1; // Reserved for future use.
#else
unsigned short isImage : 1; // Reserved for future use/llvm.
unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
- // conflict.
+ // conflict.
unsigned short ByteStore : 1; // Flag to specify if the op is a byte
- // store op.
+ // store op.
unsigned short PointerPath : 1; // Flag to specify if the op is on the
- // pointer path.
+ // pointer path.
unsigned short ResourceID : 10; // Flag to specify the resourece ID for
- // the op. (Permanent)
+ // the op. (Permanent)
unsigned short HardwareInst : 1; // Flag to specify that this instruction
- // is a hardware instruction. (Permanent)
+ // is a hardware instruction. (Permanent)
unsigned short CacheableRead : 1; // Flag to specify if the read is
- // cacheable. (Permanent)
+ // cacheable. (Permanent)
#endif
} bits;
unsigned short u16all;
-} InstrResEnc;
+ ResourceRec() {
+ u16all = 0;
+ }
+} InstrResEnc;
} // namespace AMDILAS
// The OpSwizzle encodes a subset of all possible
@@ -238,81 +277,5 @@
} bits;
unsigned char u8all;
} OpSwizzle;
-// Enums corresponding to AMDIL condition codes for IL. These
-// values must be kept in sync with the ones in the .td file.
-namespace AMDILCC
-{
-enum CondCodes {
- // AMDIL specific condition codes. These correspond to the IL_CC_*
- // in AMDILInstrInfo.td and must be kept in the same order.
- IL_CC_D_EQ = 0, // DEQ instruction.
- IL_CC_D_GE = 1, // DGE instruction.
- IL_CC_D_LT = 2, // DLT instruction.
- IL_CC_D_NE = 3, // DNE instruction.
- IL_CC_F_EQ = 4, // EQ instruction.
- IL_CC_F_GE = 5, // GE instruction.
- IL_CC_F_LT = 6, // LT instruction.
- IL_CC_F_NE = 7, // NE instruction.
- IL_CC_I_EQ = 8, // IEQ instruction.
- IL_CC_I_GE = 9, // IGE instruction.
- IL_CC_I_LT = 10, // ILT instruction.
- IL_CC_I_NE = 11, // INE instruction.
- IL_CC_U_GE = 12, // UGE instruction.
- IL_CC_U_LT = 13, // ULE instruction.
- // Pseudo IL Comparison instructions here.
- IL_CC_F_GT = 14, // GT instruction.
- IL_CC_U_GT = 15,
- IL_CC_I_GT = 16,
- IL_CC_D_GT = 17,
- IL_CC_F_LE = 18, // LE instruction
- IL_CC_U_LE = 19,
- IL_CC_I_LE = 20,
- IL_CC_D_LE = 21,
- IL_CC_F_UNE = 22,
- IL_CC_F_UEQ = 23,
- IL_CC_F_ULT = 24,
- IL_CC_F_UGT = 25,
- IL_CC_F_ULE = 26,
- IL_CC_F_UGE = 27,
- IL_CC_F_ONE = 28,
- IL_CC_F_OEQ = 29,
- IL_CC_F_OLT = 30,
- IL_CC_F_OGT = 31,
- IL_CC_F_OLE = 32,
- IL_CC_F_OGE = 33,
- IL_CC_D_UNE = 34,
- IL_CC_D_UEQ = 35,
- IL_CC_D_ULT = 36,
- IL_CC_D_UGT = 37,
- IL_CC_D_ULE = 38,
- IL_CC_D_UGE = 39,
- IL_CC_D_ONE = 40,
- IL_CC_D_OEQ = 41,
- IL_CC_D_OLT = 42,
- IL_CC_D_OGT = 43,
- IL_CC_D_OLE = 44,
- IL_CC_D_OGE = 45,
- IL_CC_U_EQ = 46,
- IL_CC_U_NE = 47,
- IL_CC_F_O = 48,
- IL_CC_D_O = 49,
- IL_CC_F_UO = 50,
- IL_CC_D_UO = 51,
- IL_CC_L_LE = 52,
- IL_CC_L_GE = 53,
- IL_CC_L_EQ = 54,
- IL_CC_L_NE = 55,
- IL_CC_L_LT = 56,
- IL_CC_L_GT = 57,
- IL_CC_UL_LE = 58,
- IL_CC_UL_GE = 59,
- IL_CC_UL_EQ = 60,
- IL_CC_UL_NE = 61,
- IL_CC_UL_LT = 62,
- IL_CC_UL_GT = 63,
- COND_ERROR = 64
-};
-
-} // end namespace AMDILCC
} // end namespace llvm
#endif // AMDIL_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL789IOExpansion.cpp Wed Sep 12 12:43:34 2012
@@ -28,43 +28,32 @@
#include <cstdio>
using namespace llvm;
-AMDIL789IOExpansion::AMDIL789IOExpansion(TargetMachine &tm,
- CodeGenOpt::Level OptLevel)
- : AMDILIOExpansion(tm, OptLevel)
-{
-}
-
-AMDIL789IOExpansion::~AMDIL789IOExpansion()
-{
-}
-const char *AMDIL789IOExpansion::getPassName() const
-{
- return "AMDIL 789 IO Expansion Pass";
-}
// This code produces the following pseudo-IL:
// cmov_logical r1006.x___, r1008.y, r1006.y, r1006.x
// cmov_logical r1006.x___, r1008.z, r1006.x, r1006.z
// cmov_logical $dst.x___, r1008.w, r1006.x, r1006.w
void
-AMDIL789IOExpansion::emitComponentExtract(MachineInstr *MI,
- unsigned src, unsigned dst, bool before)
+AMDIL789IOExpansionImpl::emitComponentExtract(MachineInstr *MI,
+ unsigned src,
+ unsigned dst,
+ bool before)
{
DebugLoc DL = MI->getDebugLoc();
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32),
- (src - AMDIL::R1) + AMDIL::Rx1)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr),
+ getCompReg(src, sub_x_comp))
.addReg(AMDIL::Ry1008)
- .addReg((src - AMDIL::R1) + AMDIL::Ry1)
- .addReg((src - AMDIL::R1) + AMDIL::Rx1);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32),
- (src - AMDIL::R1) + AMDIL::Rx1)
+ .addReg(getCompReg(src, sub_y_comp))
+ .addReg(getCompReg(src, sub_x_comp));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr),
+ getCompReg(src, sub_x_comp))
.addReg(AMDIL::Rz1008)
- .addReg((src - AMDIL::R1) + AMDIL::Rz1)
- .addReg((src - AMDIL::R1) + AMDIL::Rx1);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), dst)
+ .addReg(getCompReg(src, sub_z_comp))
+ .addReg(getCompReg(src, sub_x_comp));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), dst)
.addReg(AMDIL::Rw1008)
- .addReg((src - AMDIL::R1) + AMDIL::Rw1)
- .addReg((src - AMDIL::R1) + AMDIL::Rx1);
+ .addReg(getCompReg(src, sub_w_comp))
+ .addReg(getCompReg(src, sub_x_comp));
}
// We have a 128 bit load but a 8/16/32bit value, so we need to
// select the correct component and make sure that the correct
@@ -72,68 +61,75 @@
// extract from the component the correct bits and for 32 bits
// we just need to select the correct component.
void
-AMDIL789IOExpansion::emitDataLoadSelect(MachineInstr *MI)
+AMDIL789IOExpansionImpl::emitDataLoadSelect(MachineInstr *MI,
+ uint32_t &dataReg,
+ uint32_t &addyReg)
{
DebugLoc DL = MI->getDebugLoc();
- emitComponentExtract(MI, AMDIL::R1011, AMDIL::Rx1011, false);
if (getMemorySize(MI) == 1) {
+ emitComponentExtract(MI, AMDIL::R1011, AMDIL::Rx1011, false);
// This produces the following pseudo-IL:
- // iand r1006.x___, r1010.xxxx, l14.xxxx
+ // iand r1006.x___, addyReg.xxxx, l14.xxxx
// iadd r1006, r1006.x, {0, -1, 2, 3}
// ieq r1008, r1006, 0
// ishr r1011, r1011.x, {0, 8, 16, 24}
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1006)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1006)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1006)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1006)
.addReg(AMDIL::Rx1006)
.addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
(0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::EQv4i32rr), AMDIL::R1008)
.addReg(AMDIL::R1006)
.addImm(mMFI->addi32Literal(0));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRv4i32i32rr), AMDIL::R1011)
.addReg(AMDIL::Rx1011)
.addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
- emitComponentExtract(MI, AMDIL::R1011, AMDIL::Rx1011, false);
+ emitComponentExtract(MI, AMDIL::R1011, dataReg, false);
} else if (getMemorySize(MI) == 2) {
+ emitComponentExtract(MI, AMDIL::R1011, AMDIL::Rx1011, false);
// This produces the following pseudo-IL:
- // ishr r1007.x___, r1010.xxxx, 1
+ // ishr r1007.x___, addyReg.xxxx, 1
// iand r1008.x___, r1007.xxxx, 1
// ishr r1007.x___, r1011.xxxx, 16
// cmov_logical r1011.x___, r1008.xxxx, r1007.xxxx, r1011.xxxx
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1007)
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1007)
.addReg(AMDIL::Rx1011)
.addImm(mMFI->addi32Literal(16));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), dataReg)
.addReg(AMDIL::Rx1008)
.addReg(AMDIL::Rx1007)
.addReg(AMDIL::Rx1011);
+ } else {
+ emitComponentExtract(MI, AMDIL::R1011, dataReg, false);
}
}
// This function does address calculations modifications to load from a vector
// register type instead of a dword addressed load.
void
-AMDIL789IOExpansion::emitVectorAddressCalc(MachineInstr *MI, bool is32bit, bool needsSelect)
+AMDIL789IOExpansionImpl::emitVectorAddressCalc(MachineInstr *MI, bool is32bit,
+ bool needsSelect,
+ uint32_t &addyReg)
{
DebugLoc DL = MI->getDebugLoc();
// This produces the following pseudo-IL:
// ishr r1007.x___, r1010.xxxx, (is32bit) ? 2 : 3
// iand r1008.x___, r1007.xxxx, (is32bit) ? 3 : 1
// ishr r1007.x___, r1007.xxxx, (is32bit) ? 2 : 1
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal((is32bit) ? 0x2 : 3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1007)
.addImm(mMFI->addi32Literal((is32bit) ? 3 : 1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1007)
.addReg(AMDIL::Rx1007)
.addImm(mMFI->addi32Literal((is32bit) ? 2 : 1));
if (needsSelect) {
@@ -141,75 +137,79 @@
// pseudo-IL is produced.
// iadd r1008, r1008.x, (is32bit) ? {0, -1, -2, -3} : {0, 0, -1, -1}
// ieq r1008, r1008, 0
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi128Literal((is32bit) ? 0xFFFFFFFFULL << 32 : 0ULL,
- (is32bit) ? 0xFFFFFFFEULL | (0xFFFFFFFDULL << 32) :
+ (is32bit) ? 0xFFFFFFFEULL |
+ (0xFFFFFFFDULL << 32) :
-1ULL));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::EQv4i32rr), AMDIL::R1008)
.addReg(AMDIL::R1008)
.addImm(mMFI->addi32Literal(0));
}
+ addyReg = AMDIL::Rx1007;
}
// This function emits a switch statement and writes 32bit/64bit
// value to a 128bit vector register type.
void
-AMDIL789IOExpansion::emitVectorSwitchWrite(MachineInstr *MI, bool is32bit)
+AMDIL789IOExpansionImpl::emitVectorSwitchWrite(MachineInstr *MI,
+ bool is32bit,
+ uint32_t &addyReg,
+ uint32_t &dataReg)
{
uint32_t xID = getPointerID(MI);
- assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+ assert(
+ xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
// This section generates the following pseudo-IL:
// switch r1008.x
// default
- // mov x1[r1007.x].(is32bit) ? x___ : xy__, r1011.x{y}
+ // mov x1[$addyReg.x].(is32bit) ? x___ : xy__, r1011.x{y}
// break
// case 1
- // mov x1[r1007.x].(is32bit) ? _y__ : __zw, r1011.x{yxy}
+ // mov x1[$addyReg.x].(is32bit) ? _y__ : __zw, r1011.x{yxy}
// break
// if is32bit is true, case 2 and 3 are emitted.
// case 2
- // mov x1[r1007.x].__z_, r1011.x
+ // mov x1[$addyReg.x].__z_, r1011.x
// break
// case 3
- // mov x1[r1007.x].___w, r1011.x
+ // mov x1[$addyReg.x].___w, r1011.x
// break
// endswitch
DebugLoc DL = MI->getDebugLoc();
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SWITCH))
.addReg(AMDIL::Rx1008);
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DEFAULT));
- BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHSTORE),
- (is32bit) ? AMDIL::Rx1007 : AMDIL::Rxy1007)
- .addReg((is32bit) ? AMDIL::Rx1011 : AMDIL::Rxy1011)
- .addImm(xID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SCRATCH32STORE),
+ (is32bit ? addyReg
+ : (addyReg - AMDIL::Rx1) + AMDIL::Rxy1))
+ .addReg(dataReg).addImm(xID);
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(1);
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHSTORE),
- (is32bit) ? AMDIL::Ry1007 : AMDIL::Rzw1007)
- .addReg(is32bit ? AMDIL::Rx1011 : AMDIL::Rxy1011)
- .addImm(xID);
+ mTII->get(AMDIL::SCRATCH32STORE),
+ ( is32bit ? (addyReg - AMDIL::Rx1) + AMDIL::Ry1
+ : (addyReg - AMDIL::Rx1) + AMDIL::Rzw1))
+ .addReg(dataReg).addImm(xID);
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
if (is32bit) {
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(2);
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rz1007)
- .addReg(AMDIL::Rx1011)
- .addImm(xID);
+ mTII->get(AMDIL::SCRATCH32STORE),
+ (addyReg - AMDIL::Rx1) + AMDIL::Rz1)
+ .addReg(dataReg).addImm(xID);
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CASE)).addImm(3);
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rw1007)
- .addReg(AMDIL::Rx1011)
- .addImm(xID);
+ mTII->get(AMDIL::SCRATCH32STORE),
+ (addyReg - AMDIL::Rx1) + AMDIL::Rw1)
+ .addReg(dataReg).addImm(xID);
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BREAK));
}
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ENDSWITCH));
-
}
void
-AMDIL789IOExpansion::expandPrivateLoad(MachineInstr *MI)
+AMDIL789IOExpansionImpl::expandPrivateLoad(MachineInstr *MI)
{
bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
@@ -219,14 +219,18 @@
mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
}
uint32_t xID = getPointerID(MI);
- assert(xID && "Found a scratch load that was incorrectly marked as zero ID!\n");
+ assert(
+ xID && "Found a scratch load that was incorrectly marked as zero ID!\n");
if (!xID) {
xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
}
DebugLoc DL = MI->getDebugLoc();
+ uint32_t origReg = MI->getOperand(1).getReg();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions go before the current MI.
- expandLoadStartCode(MI);
+ expandLoadStartCode(MI, addyReg);
switch (getMemorySize(MI)) {
default:
// Since the private register is a 128 bit aligned, we have to align the address
@@ -235,61 +239,60 @@
// ishr r1010.x___, r1010.xxxx, 4
// mov r1011, x1[r1010.x]
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(4));
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
+ mTII->get(AMDIL::SCRATCH32LOAD), dataReg)
.addReg(AMDIL::Rx1010)
.addImm(xID);
break;
case 1:
case 2:
case 4:
- emitVectorAddressCalc(MI, true, true);
+ emitVectorAddressCalc(MI, true, true, addyReg);
// This produces the following pseudo-IL:
// mov r1011, x1[r1007.x]
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
- .addReg(AMDIL::Rx1007)
+ mTII->get(AMDIL::SCRATCH32LOAD), AMDIL::R1011)
+ .addReg(addyReg)
.addImm(xID);
// These instructions go after the current MI.
- emitDataLoadSelect(MI);
+ emitDataLoadSelect(MI, dataReg, origReg);
break;
case 8:
- emitVectorAddressCalc(MI, false, true);
+ emitVectorAddressCalc(MI, false, true, addyReg);
// This produces the following pseudo-IL:
// mov r1011, x1[r1007.x]
// cmov_logical r1011.xy__, r1008.xxxx, r1011.xy, r1011.zw
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1011)
- .addReg(AMDIL::Rx1007)
+ mTII->get(AMDIL::SCRATCH32LOAD), AMDIL::R1011)
+ .addReg(addyReg)
.addImm(xID);
- // These instructions go after the current MI.
- BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::CMOVLOG_i64), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1008)
- .addReg(AMDIL::Rxy1011)
- .addReg(AMDIL::Rzw1011);
+ if (isExtLoadInst(MI)
+ && MI->getDesc().OpInfo[0].RegClass
+ == AMDIL::GPRV2F64RegClassID) {
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SELECTi64rrr),
+ getCompReg(dataReg, sub_xy_comp))
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rxy1011)
+ .addReg(AMDIL::Rzw1011);
+ } else {
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::SELECTi64rrr), dataReg)
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rxy1011)
+ .addReg(AMDIL::Rzw1011);
+ }
break;
}
- unsigned dataReg;
- expandPackedData(MI);
- dataReg = expandExtendLoad(MI);
- if (!dataReg) {
- dataReg = getDataReg(MI);
- }
- BuildMI(*mBB, MI, MI->getDebugLoc(),
- mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)))
- .addOperand(MI->getOperand(0))
- .addReg(dataReg);
+ expandPackedData(MI, dataReg);
+ expandExtendLoad(MI, dataReg);
MI->getOperand(0).setReg(dataReg);
}
-
-
void
-AMDIL789IOExpansion::expandConstantLoad(MachineInstr *MI)
+AMDIL789IOExpansionImpl::expandConstantLoad(MachineInstr *MI)
{
if (!isHardwareInst(MI) || MI->memoperands_empty()) {
return expandGlobalLoad(MI);
@@ -303,74 +306,67 @@
}
DebugLoc DL = MI->getDebugLoc();
+ uint32_t origReg = MI->getOperand(1).getReg();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions go before the current MI.
- expandLoadStartCode(MI);
+ expandLoadStartCode(MI, addyReg);
switch (getMemorySize(MI)) {
default:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(4));
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
+ mTII->get(AMDIL::CB32LOAD), dataReg)
.addReg(AMDIL::Rx1010)
.addImm(cID);
break;
case 1:
case 2:
case 4:
- emitVectorAddressCalc(MI, true, true);
+ emitVectorAddressCalc(MI, true, true, addyReg);
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
- .addReg(AMDIL::Rx1007)
+ mTII->get(AMDIL::CB32LOAD), AMDIL::R1011)
+ .addReg(addyReg)
.addImm(cID);
- // These instructions go after the current MI.
- emitDataLoadSelect(MI);
+ emitDataLoadSelect(MI, dataReg, origReg);
break;
case 8:
- emitVectorAddressCalc(MI, false, true);
+ emitVectorAddressCalc(MI, false, true, addyReg);
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::CBLOAD), AMDIL::R1011)
- .addReg(AMDIL::Rx1007)
+ mTII->get(AMDIL::CB32LOAD), AMDIL::R1011)
+ .addReg(addyReg)
.addImm(cID);
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::CMOVLOG_i64), AMDIL::Rxy1011)
+ mTII->get(AMDIL::SELECTi64rrr), dataReg)
.addReg(AMDIL::Rx1008)
.addReg(AMDIL::Rxy1011)
.addReg(AMDIL::Rzw1011);
break;
}
- expandPackedData(MI);
- unsigned dataReg = expandExtendLoad(MI);
- if (!dataReg) {
- dataReg = getDataReg(MI);
- }
- BuildMI(*mBB, MI, MI->getDebugLoc(),
- mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)))
- .addOperand(MI->getOperand(0))
- .addReg(dataReg);
+ expandPackedData(MI, dataReg);
+ expandExtendLoad(MI, dataReg);
MI->getOperand(0).setReg(dataReg);
}
-
void
-AMDIL789IOExpansion::expandConstantPoolLoad(MachineInstr *MI)
+AMDIL789IOExpansionImpl::expandConstantPoolLoad(MachineInstr *MI)
{
if (!isStaticCPLoad(MI)) {
return expandConstantLoad(MI);
} else {
+ uint32_t dataReg = MI->getOperand(0).getReg();
uint32_t idx = MI->getOperand(1).getIndex();
const MachineConstantPool *MCP = MI->getParent()->getParent()
->getConstantPool();
const std::vector<MachineConstantPoolEntry> &consts
- = MCP->getConstants();
+ = MCP->getConstants();
const Constant *C = consts[idx].Val.ConstVal;
- emitCPInst(MI, C, mKM, 0, isExtendLoad(MI));
+ emitCPInst(MI, C, mKM, 0, isExtendLoad(MI), dataReg);
}
}
-
void
-AMDIL789IOExpansion::expandPrivateStore(MachineInstr *MI)
+AMDIL789IOExpansionImpl::expandPrivateStore(MachineInstr *MI)
{
bool HWPrivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
if (!HWPrivate || mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV)) {
@@ -380,35 +376,44 @@
mMFI->addErrorMsg(amd::CompilerErrorMessage[MEMOP_NO_ALLOCATION]);
}
uint32_t xID = getPointerID(MI);
- assert(xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
+ assert(
+ xID && "Found a scratch store that was incorrectly marked as zero ID!\n");
if (!xID) {
xID = mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID);
mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
}
DebugLoc DL = MI->getDebugLoc();
+ uint32_t dataReg = MI->getOperand(0).getReg();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t origReg = addyReg;
+ if (origReg == AMDIL::DFP) {
+ BuildMI(*mBB, MI, DL, mTII->get(TargetOpcode::COPY), AMDIL::Rx1010).addReg(
+ addyReg);
+ addyReg = origReg = AMDIL::Rx1010;
+ }
// These instructions go before the current MI.
- expandStoreSetupCode(MI);
+ expandStoreSetupCode(MI, addyReg, dataReg);
switch (getMemorySize(MI)) {
default:
// This section generates the following pseudo-IL:
// ishr r1010.x___, r1010.xxxx, 4
// mov x1[r1010.x], r1011
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SHR_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(4));
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHSTORE), AMDIL::Rx1010)
- .addReg(AMDIL::R1011)
+ mTII->get(AMDIL::SCRATCH32STORE), AMDIL::Rx1010)
+ .addReg(dataReg)
.addImm(xID);
break;
case 1:
- emitVectorAddressCalc(MI, true, true);
+ emitVectorAddressCalc(MI, true, true, addyReg);
// This section generates the following pseudo-IL:
// mov r1002, x1[r1007.x]
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
- .addReg(AMDIL::Rx1007)
+ mTII->get(AMDIL::SCRATCH32LOAD), AMDIL::R1002)
+ .addReg(addyReg)
.addImm(xID);
emitComponentExtract(MI, AMDIL::R1002, AMDIL::Rx1002, true);
// This section generates the following pseudo-IL:
@@ -417,22 +422,22 @@
// ieq r1001, r1001, 0
// ishr r1002, r1002.x, {0, 8, 16, 24}
// cmov_logical r1002, r1001, r1011.x, r1002
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1003)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1003)
+ .addReg(origReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1001)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1001)
.addReg(AMDIL::Rx1003)
.addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
(0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1001)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::EQv4i32rr), AMDIL::R1001)
.addReg(AMDIL::R1001)
.addImm(mMFI->addi32Literal(0));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v4i32), AMDIL::R1002)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRv4i32i32rr), AMDIL::R1002)
.addReg(AMDIL::Rx1002)
.addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_v4i32), AMDIL::R1002)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTv4i32rrr), AMDIL::R1002)
.addReg(AMDIL::R1001)
- .addReg(AMDIL::Rx1011)
+ .addReg(dataReg)
.addReg(AMDIL::R1002);
if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
// This section generates the following pseudo-IL:
@@ -440,15 +445,15 @@
// ishl r1002, r1002, {0, 8, 16, 24}
// ior r1002.xy, r1002.xy, r1002.zw
// ior r1011.x, r1002.x, r1002.y
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1002)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDv4i32rr), AMDIL::R1002)
.addReg(AMDIL::R1002)
.addImm(mMFI->addi32Literal(0xFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1002)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLv4i32i32rr), AMDIL::R1002)
.addReg(AMDIL::R1002)
.addImm(mMFI->addi128Literal(8ULL << 32, 16ULL | (24ULL << 32)));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::Rxy1002)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITORv2i64rr), AMDIL::Rxy1002)
.addReg(AMDIL::Rxy1002).addReg(AMDIL::Rzw1002);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITORv2i32rr), AMDIL::Rx1011)
.addReg(AMDIL::Ry1002).addReg(AMDIL::Rx1002);
} else {
// This section generates the following pseudo-IL:
@@ -456,55 +461,57 @@
// mov r1002.xy, r1002.xz
// ubit_insert r1002.xy, 8, 8, r1001.xy, r1002.xy
// ubit_insert r1011.x, 16, 16, r1002.y, r1002.x
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1001)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHIv2i64r), AMDIL::Rxy1001)
.addReg(AMDIL::R1002);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1002)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLOv2i64r), AMDIL::Rxy1002)
.addReg(AMDIL::R1002);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::Rxy1002)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UBIT_INSERTv2i32rrrr), AMDIL::Rxy1002)
.addImm(mMFI->addi32Literal(8))
.addImm(mMFI->addi32Literal(8))
.addReg(AMDIL::Rxy1001)
.addReg(AMDIL::Rxy1002);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERTi32rrrr), AMDIL::Rx1011)
.addImm(mMFI->addi32Literal(16))
.addImm(mMFI->addi32Literal(16))
.addReg(AMDIL::Ry1002)
.addReg(AMDIL::Rx1002);
}
- emitVectorAddressCalc(MI, true, false);
- emitVectorSwitchWrite(MI, true);
+ dataReg = AMDIL::Rx1011;
+ emitVectorAddressCalc(MI, true, false, origReg);
+ emitVectorSwitchWrite(MI, true, origReg, dataReg);
break;
case 2:
- emitVectorAddressCalc(MI, true, true);
+ emitVectorAddressCalc(MI, true, true, addyReg);
// This section generates the following pseudo-IL:
// mov r1002, x1[r1007.x]
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::SCRATCHLOAD), AMDIL::R1002)
- .addReg(AMDIL::Rx1007)
+ mTII->get(AMDIL::SCRATCH32LOAD), AMDIL::R1002)
+ .addReg(addyReg)
.addImm(xID);
emitComponentExtract(MI, AMDIL::R1002, AMDIL::Rx1002, true);
// This section generates the following pseudo-IL:
- // ishr r1003.x, r1010.x, 1
+ // ishr r1003.x, $origReg, 1
// iand r1003.x, r1003.x, 1
// ishr r1001.x, r1002.x, 16
- // cmov_logical r1002.x, r1003.x, r1002.x, r1011.x
- // cmov_logical r1001.x, r1003.x, r1011.x, r1001.x
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1003)
- .addReg(AMDIL::Rx1010)
+ // cmov_logical r1002.x, r1003.x, r1002.x, $origReg
+ // cmov_logical r1001.x, r1003.x, $origReg, r1001.x
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1003)
+ .addReg(origReg)
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1003)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1003)
.addReg(AMDIL::Rx1003)
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1001)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1001)
.addReg(AMDIL::Rx1002)
.addImm(mMFI->addi32Literal(16));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1002)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1002)
.addReg(AMDIL::Rx1003)
.addReg(AMDIL::Rx1002)
- .addReg(AMDIL::Rx1011);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1001)
+ .addReg(dataReg);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1001)
.addReg(AMDIL::Rx1003)
- .addReg(AMDIL::Rx1011)
+ .addReg(dataReg)
.addReg(AMDIL::Rx1001);
if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
// This section generates the following pseudo-IL:
@@ -512,165 +519,150 @@
// iand r1001.x, r1001.x, 0xFFFF
// ishl r1001.x, r1002.x, 16
// ior r1011.x, r1002.x, r1001.x
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1002)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1002)
.addReg(AMDIL::Rx1002)
.addImm(mMFI->addi32Literal(0xFFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1001)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1001)
.addReg(AMDIL::Rx1001)
.addImm(mMFI->addi32Literal(0xFFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1001)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLi32i32rr), AMDIL::Rx1001)
.addReg(AMDIL::Rx1001)
.addImm(mMFI->addi32Literal(16));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_OR_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ORi32rr), AMDIL::Rx1011)
.addReg(AMDIL::Rx1002).addReg(AMDIL::Rx1001);
-
+ dataReg = AMDIL::Rx1011;
} else {
// This section generates the following pseudo-IL:
// ubit_insert r1011.x, 16, 16, r1001.x, r1002.x
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERTi32rrrr), AMDIL::Rx1011)
.addImm(mMFI->addi32Literal(16))
.addImm(mMFI->addi32Literal(16))
.addReg(AMDIL::Rx1001)
.addReg(AMDIL::Rx1002);
+ dataReg = AMDIL::Rx1011;
}
- emitVectorAddressCalc(MI, true, false);
- emitVectorSwitchWrite(MI, true);
+ emitVectorAddressCalc(MI, true, false, origReg);
+ emitVectorSwitchWrite(MI, true, origReg, dataReg);
break;
case 4:
- emitVectorAddressCalc(MI, true, false);
- emitVectorSwitchWrite(MI, true);
+ emitVectorAddressCalc(MI, true, false, addyReg);
+ emitVectorSwitchWrite(MI, true, addyReg, dataReg);
break;
case 8:
- emitVectorAddressCalc(MI, false, false);
- emitVectorSwitchWrite(MI, false);
+ emitVectorAddressCalc(MI, false, false, addyReg);
+ emitVectorSwitchWrite(MI, false, addyReg, dataReg);
break;
};
}
void
-AMDIL789IOExpansion::expandStoreSetupCode(MachineInstr *MI)
+AMDIL789IOExpansionImpl::expandStoreSetupCode(MachineInstr *MI,
+ uint32_t &addyReg,
+ uint32_t &dataReg)
{
DebugLoc DL;
- bool is64bit = is64bitLSOp(TM, MI);
- uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
- uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
- uint32_t moveInst = (is64bit) ? AMDIL::MOVE_i64 : AMDIL::MOVE_i32;
+ bool is64bit = is64bitLSOp(MI);
if (MI->getOperand(0).isUndef()) {
- BuildMI(*mBB, MI, DL, mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(TargetOpcode::COPY), dataReg)
.addImm(mMFI->addi32Literal(0));
- } else {
- BuildMI(*mBB, MI, DL, mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)), AMDIL::R1011)
- .addReg(MI->getOperand(0).getReg());
}
- expandTruncData(MI);
+ expandTruncData(MI, dataReg);
if (MI->getOperand(2).isReg()) {
- BuildMI(*mBB, MI, DL, mTII->get(addInst), addyReg)
- .addReg(MI->getOperand(1).getReg())
+ uint32_t newReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+ uint32_t addInst = (is64bit) ? AMDIL::ADDi64rr : AMDIL::ADDi32rr;
+ BuildMI(*mBB, MI, DL, mTII->get(addInst), newReg)
+ .addReg(addyReg)
.addReg(MI->getOperand(2).getReg());
- } else {
- BuildMI(*mBB, MI, DL, mTII->get(moveInst), addyReg)
- .addReg(MI->getOperand(1).getReg());
+ addyReg = newReg;
}
- expandAddressCalc(MI);
- expandPackedData(MI);
+ expandAddressCalc(MI, addyReg);
+ expandPackedData(MI, dataReg);
}
-
-
void
-AMDIL789IOExpansion::expandPackedData(MachineInstr *MI)
+AMDIL789IOExpansionImpl::expandPackedData(MachineInstr *MI, uint32_t &dataReg)
{
- if (!isPackedData(MI)) {
+ if (!isPackedInst(MI)) {
return;
}
DebugLoc DL = MI->getDebugLoc();
+ uint32_t packedReg = getPackedReg(dataReg, getPackedID(MI));
// If we have packed data, then the shift size is no longer
// the same as the load size and we need to adjust accordingly
switch(getPackedID(MI)) {
default:
break;
- case PACK_V2I8: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::Rxy1011)
- .addReg(AMDIL::Rxy1011)
-
+ case PACK_V2I8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDv2i32rr), AMDIL::Rxy1011)
+ .addReg(dataReg)
.addImm(mMFI->addi64Literal(0xFFULL | (0xFFULL << 32)));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::Rxy1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLv2i32i32rr), AMDIL::Rxy1011)
.addReg(AMDIL::Rxy1011).addImm(mMFI->addi64Literal(8ULL << 32));
// TODO: HILO_BITOR can be removed and replaced with OR.
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITORv2i32rr),
+ getCompReg(dataReg, sub_x_comp))
.addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
-
- }
- break;
- case PACK_V4I8: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
- .addReg(AMDIL::R1011)
+ break;
+ case PACK_V4I8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDv4i32rr), AMDIL::R1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLv4i32i32rr), AMDIL::R1011)
.addReg(AMDIL::R1011)
.addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
// TODO: HILO_BITOR can be removed and replaced with OR.
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i64), AMDIL::Rxy1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITORv2i64rr), AMDIL::Rxy1011)
.addReg(AMDIL::Rxy1011).addReg(AMDIL::Rzw1011);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITORv2i32rr),
+ getCompReg(dataReg, sub_x_comp))
.addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
-
- }
- break;
- case PACK_V2I16: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v2i32), AMDIL::Rxy1011)
- .addReg(AMDIL::Rxy1011)
-
+ break;
+ case PACK_V2I16:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDv2i32rr), AMDIL::Rxy1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v2i32), AMDIL::Rxy1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLv2i32i32rr), AMDIL::Rxy1011)
.addReg(AMDIL::Rxy1011)
-
.addImm(mMFI->addi64Literal(16ULL << 32));
// TODO: HILO_BITOR can be removed and replaced with OR.
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v2i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITORv2i32rr),
+ getCompReg(dataReg, sub_x_comp))
.addReg(AMDIL::Rx1011).addReg(AMDIL::Ry1011);
-
- }
- break;
- case PACK_V4I16: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
- .addReg(AMDIL::R1011)
+ break;
+ case PACK_V4I16:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDv4i32rr), AMDIL::R1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_v4i32), AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLv4i32i32rr), AMDIL::R1011)
.addReg(AMDIL::R1011)
.addImm(mMFI->addi64Literal(16ULL << 32));
// TODO: HILO_BITOR can be removed and replaced with OR.
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITOR_v4i16), AMDIL::Rxy1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::HILO_BITORv4i16rr),
+ getCompReg(dataReg, sub_xy_comp))
.addReg(AMDIL::Rxy1011).addReg(AMDIL::Rzw1011);
-
- }
- break;
+ break;
case UNPACK_V2I8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::Ry1011)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRi32i32rr),
+ getCompReg(dataReg, sub_y_comp))
+ .addReg(packedReg)
.addImm(mMFI->addi32Literal(8));
break;
- case UNPACK_V4I8: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_v4i8), AMDIL::R1011)
- .addReg(AMDIL::Rx1011)
+ case UNPACK_V4I8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRv4i8i32rr), dataReg)
+ .addReg(packedReg)
.addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))));
- }
- break;
- case UNPACK_V2I16: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_i32), AMDIL::Ry1011)
- .addReg(AMDIL::Rx1011)
+ break;
+ case UNPACK_V2I16:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRi32i32rr),
+ getCompReg(dataReg, sub_y_comp))
+ .addReg(packedReg)
.addImm(mMFI->addi32Literal(16));
- }
- break;
- case UNPACK_V4I16: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRVEC_v2i32), AMDIL::Rxy1012)
- .addReg(AMDIL::Rxy1011)
-
+ break;
+ case UNPACK_V4I16:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::USHRv2i32i32rr), AMDIL::Rxy1012)
+ .addReg(packedReg)
.addImm(mMFI->addi32Literal(16));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
- .addReg(AMDIL::Rxy1011).addReg(AMDIL::Rxy1012);
- }
- break;
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATEv2i64rr), dataReg)
+ .addReg(packedReg).addReg(AMDIL::Rxy1012);
+ break;
};
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.cpp Wed Sep 12 12:43:34 2012
@@ -42,7 +42,6 @@
: AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
{
}
-
AMDIL7XXAsmPrinter::~AMDIL7XXAsmPrinter()
{
}
@@ -82,7 +81,6 @@
}
emitMCallInst(MI, O, name);
}
-
bool
AMDIL7XXAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
{
@@ -100,7 +98,6 @@
EmitFunctionBody();
return false;
}
-
void
AMDIL7XXAsmPrinter::EmitInstruction(const MachineInstr *II)
{
@@ -109,7 +106,7 @@
formatted_raw_ostream O(OFunStr);
const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
if (mDebugMode) {
- O << ";" ;
+ O << ";";
II->print(O);
}
if (isMacroFunc(II)) {
@@ -159,7 +156,7 @@
} else {
printOperand(II, x
, O
- );
+ );
}
if (!x) {
O << "), (";
@@ -194,8 +191,6 @@
mMFI->addCalledIntr(macronum);
}
} else {
-
-
// Print the assembly for the instruction.
// We want to make sure that we do HW constants
// before we do arena segment
@@ -209,6 +204,8 @@
O << "mem0.x___, cb0[3].x, r0.0\n";
O << "\tendif\n";
mMFI->addMetadata(";memory:compilerwrite");
+ } else if (II->getOpcode() == AMDIL::COPY) {
+ printCopy(II, O);
} else {
printInstruction(II, O);
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXAsmPrinter.h Wed Sep 12 12:43:34 2012
@@ -35,7 +35,6 @@
//
virtual ~AMDIL7XXAsmPrinter();
-
void
EmitInstruction(const MachineInstr *MI);
@@ -56,7 +55,6 @@
//
virtual void
emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
-
-}; // AMDIL7XXAsmPrinter
+}; // AMDIL7XXAsmPrinter
} // end of llvm namespace
#endif // AMDIL_7XX_ASM_PRINTER_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.cpp Wed Sep 12 12:43:34 2012
@@ -31,16 +31,13 @@
mDeviceFlag = OCL_DEVICE_RV770;
}
}
-
AMDIL7XXDevice::~AMDIL7XXDevice()
{
}
-
void AMDIL7XXDevice::setCaps()
{
mSWBits.set(AMDILDeviceInfo::LocalMem);
}
-
size_t AMDIL7XXDevice::getMaxLDSSize() const
{
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
@@ -48,17 +45,14 @@
}
return 0;
}
-
size_t AMDIL7XXDevice::getWavefrontSize() const
{
return AMDILDevice::HalfWavefrontSize;
}
-
uint32_t AMDIL7XXDevice::getGeneration() const
{
return AMDILDeviceInfo::HD4XXX;
}
-
uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
{
switch (DeviceID) {
@@ -90,25 +84,20 @@
return 0;
}
-
uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
{
return 1;
}
-
FunctionPass*
-AMDIL7XXDevice::getIOExpansion(
- TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+AMDIL7XXDevice::getIOExpansion() const
{
- return new AMDIL7XXIOExpansion(TM, OptLevel);
+ return new AMDIL7XXIOExpansion();
}
-
AsmPrinter*
AMDIL7XXDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
{
return new AMDIL7XXAsmPrinter(ASM_PRINTER_ARGUMENTS);
}
-
FunctionPass*
AMDIL7XXDevice::getPointerManager(
TargetMachine& TM, CodeGenOpt::Level OptLevel) const
@@ -118,21 +107,18 @@
// more advanced tracking pass as it is possible to
// loose information through the stack. The EGPM
// pass tracks this, but the standard pass does not.
- return new AMDILEGPointerManager(TM, OptLevel);
+ return new AMDILEGPointerManager();
} else {
- return new AMDILPointerManager(TM, OptLevel);
+ return new AMDILPointerManager();
}
}
-
-AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
+AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
{
setCaps();
}
-
AMDIL770Device::~AMDIL770Device()
{
}
-
void AMDIL770Device::setCaps()
{
if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
@@ -144,20 +130,16 @@
mSWBits.set(AMDILDeviceInfo::LongOps);
mSWBits.set(AMDILDeviceInfo::LocalMem);
}
-
size_t AMDIL770Device::getWavefrontSize() const
{
return AMDILDevice::WavefrontSize;
}
-
AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
{
}
-
AMDIL710Device::~AMDIL710Device()
{
}
-
size_t AMDIL710Device::getWavefrontSize() const
{
return AMDILDevice::QuarterWavefrontSize;
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXDevice.h Wed Sep 12 12:43:34 2012
@@ -15,8 +15,7 @@
#define _AMDIL7XXDEVICEIMPL_H_
#include "AMDILDevice.h"
#include "AMDILSubtarget.h"
-namespace llvm
-{
+namespace llvm {
class AMDILSubtarget;
//===----------------------------------------------------------------------===//
@@ -27,8 +26,7 @@
// devices are derived from this class. The AMDIL7XX device will only
// support the minimal features that are required to be considered OpenCL 1.0
// compliant and nothing more.
-class AMDIL7XXDevice : public AMDILDevice
-{
+class AMDIL7XXDevice : public AMDILDevice {
public:
AMDIL7XXDevice(AMDILSubtarget *ST);
virtual ~AMDIL7XXDevice();
@@ -37,8 +35,7 @@
virtual uint32_t getGeneration() const;
virtual uint32_t getResourceID(uint32_t DeviceID) const;
virtual uint32_t getMaxNumUAVs() const;
- FunctionPass*
- getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+ FunctionPass* getIOExpansion() const;
AsmPrinter*
getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
FunctionPass*
@@ -52,8 +49,7 @@
// derivative cards. The difference between this device and the base
// class is this device device adds support for double precision
// and has a larger wavefront size.
-class AMDIL770Device : public AMDIL7XXDevice
-{
+class AMDIL770Device : public AMDIL7XXDevice {
public:
AMDIL770Device(AMDILSubtarget *ST);
virtual ~AMDIL770Device();
@@ -65,13 +61,11 @@
// The AMDIL710Device class derives from the 7XX base class, but this
// class is a smaller derivative, so we need to overload some of the
// functions in order to correctly specify this information.
-class AMDIL710Device : public AMDIL7XXDevice
-{
+class AMDIL710Device : public AMDIL7XXDevice {
public:
AMDIL710Device(AMDILSubtarget *ST);
virtual ~AMDIL710Device();
virtual size_t getWavefrontSize() const;
}; // AMDIL710Device
-
} // namespace llvm
#endif // _AMDILDEVICEIMPL_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDIL7XXIOExpansion.cpp Wed Sep 12 12:43:34 2012
@@ -28,113 +28,119 @@
#include <cstdio>
using namespace llvm;
-AMDIL7XXIOExpansion::AMDIL7XXIOExpansion(TargetMachine &tm,
- CodeGenOpt::Level OptLevel) : AMDIL789IOExpansion(tm, OptLevel)
+
+namespace llvm
{
+extern void initializeAMDIL7XXIOExpansionPass(PassRegistry&);
}
-AMDIL7XXIOExpansion::~AMDIL7XXIOExpansion()
+char AMDIL7XXIOExpansion::ID = 0;
+INITIALIZE_PASS(AMDIL7XXIOExpansion, "7xx-io-expansion",
+ "AMDIL 7XX IO Expansion", false, false);
+
+AMDIL7XXIOExpansion::AMDIL7XXIOExpansion()
+ : MachineFunctionPass(ID)
{
+ initializeAMDIL7XXIOExpansionPass(*PassRegistry::getPassRegistry());
}
const char *AMDIL7XXIOExpansion::getPassName() const
{
return "AMDIL 7XX IO Expansion Pass";
}
-
+bool AMDIL7XXIOExpansion::runOnMachineFunction(MachineFunction& MF)
+{
+ AMDIL7XXIOExpansionImpl impl(MF);
+ return impl.run();
+}
void
-AMDIL7XXIOExpansion::expandGlobalLoad(MachineInstr *MI)
+AMDIL7XXIOExpansionImpl::expandGlobalLoad(MachineInstr *MI)
{
DebugLoc DL = MI->getDebugLoc();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions go before the current MI.
- expandLoadStartCode(MI);
+ expandLoadStartCode(MI, addyReg);
uint32_t ID = getPointerID(MI);
mKM->setOutputInst();
switch(getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32), AMDIL::R1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADv4i32), dataReg)
+ .addReg(addyReg)
.addImm(ID);
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADi32), dataReg)
+ .addReg(addyReg)
.addImm(ID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADv2i32), AMDIL::Rxy1011)
+ .addReg(addyReg)
.addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::COPY), dataReg)
+ .addReg(AMDIL::Rxy1011);
break;
case 1:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
(0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::EQv4i32rr), AMDIL::R1012)
.addReg(AMDIL::R1008)
.addImm(mMFI->addi32Literal(0));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1012)
.addImm(mMFI->addi32Literal(0))
.addImm(mMFI->addi32Literal(24));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Ry1012)
.addImm(mMFI->addi32Literal(8))
.addReg(AMDIL::Rx1008);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rz1012)
.addImm(mMFI->addi32Literal(16))
.addReg(AMDIL::Rx1008);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADi32), AMDIL::Rx1011)
.addReg(AMDIL::Rx1010)
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i8), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi8i32rr), dataReg)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
break;
case 2:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(16))
.addImm(mMFI->addi32Literal(0));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADi32), AMDIL::Rx1011)
.addReg(AMDIL::Rx1010)
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi16i32rr), dataReg)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
break;
}
- expandPackedData(MI);
- unsigned dataReg = expandExtendLoad(MI);
- if (!dataReg) {
- dataReg = getDataReg(MI);
- }
- BuildMI(*mBB, MI, MI->getDebugLoc(),
- mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)))
- .addOperand(MI->getOperand(0))
- .addReg(dataReg);
+ expandPackedData(MI, dataReg);
+ expandExtendLoad(MI, dataReg);
MI->getOperand(0).setReg(dataReg);
}
-
void
-AMDIL7XXIOExpansion::expandRegionLoad(MachineInstr *MI)
+AMDIL7XXIOExpansionImpl::expandRegionLoad(MachineInstr *MI)
{
bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
@@ -156,95 +162,95 @@
}
DebugLoc DL = MI->getDebugLoc();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions go before the current MI.
- expandLoadStartCode(MI);
+ expandLoadStartCode(MI, addyReg);
switch (getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1010)
+ .addReg(addyReg)
.addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_x_comp))
.addReg(AMDIL::Rx1010)
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_y_comp))
.addReg(AMDIL::Ry1010)
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rz1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_z_comp))
.addReg(AMDIL::Rz1010)
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rw1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_w_comp))
.addReg(AMDIL::Rw1010)
.addImm(gID);
break;
case 1:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMULi32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r), AMDIL::Rx1011)
.addReg(AMDIL::Rx1010)
.addImm(gID);
// The instruction would normally fit in right here so everything created
// after this point needs to go into the afterInst vector.
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), dataReg)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
break;
case 2:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMULi32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r), AMDIL::Rx1011)
.addReg(AMDIL::Rx1010)
.addImm(gID);
// The instruction would normally fit in right here so everything created
// after this point needs to go into the afterInst vector.
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), dataReg)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r), dataReg)
+ .addReg(addyReg)
.addImm(gID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv2i32rr), AMDIL::Rxy1010)
+ .addReg(addyReg)
.addImm(mMFI->addi64Literal(1ULL << 32));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_x_comp, sub_z_comp))
.addReg(AMDIL::Rx1010)
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_y_comp, sub_w_comp))
.addReg(AMDIL::Ry1010)
.addImm(gID);
break;
}
- expandPackedData(MI);
- unsigned dataReg = expandExtendLoad(MI);
- if (!dataReg) {
- dataReg = getDataReg(MI);
- }
- BuildMI(*mBB, MI, MI->getDebugLoc(),
- mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)))
- .addOperand(MI->getOperand(0))
- .addReg(dataReg);
+ expandPackedData(MI, dataReg);
+ expandExtendLoad(MI, dataReg);
MI->getOperand(0).setReg(dataReg);
}
void
-AMDIL7XXIOExpansion::expandLocalLoad(MachineInstr *MI)
+AMDIL7XXIOExpansionImpl::expandLocalLoad(MachineInstr *MI)
{
bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
if (!HWLocal || !isHardwareLocal(MI)) {
@@ -260,120 +266,114 @@
mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
}
DebugLoc DL = MI->getDebugLoc();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions go before the current MI.
- expandLoadStartCode(MI);
+ expandLoadStartCode(MI, addyReg);
switch (getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::R1011)
- .addReg(AMDIL::R1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADv4i32r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC), AMDIL::Rxy1011)
- .addReg(AMDIL::Rxy1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADv2i32r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADi32r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
break;
case 1:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMULi32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADi32r), AMDIL::Rx1011)
.addReg(AMDIL::Rx1010)
.addImm(lID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), dataReg)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
break;
case 2:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMULi32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADi32r), AMDIL::Rx1011)
.addReg(AMDIL::Rx1010)
.addImm(lID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), dataReg)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
break;
}
- expandPackedData(MI);
- unsigned dataReg = expandExtendLoad(MI);
- if (!dataReg) {
- dataReg = getDataReg(MI);
- }
- BuildMI(*mBB, MI, MI->getDebugLoc(),
- mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)))
- .addOperand(MI->getOperand(0))
- .addReg(dataReg);
+ expandPackedData(MI, dataReg);
+ expandExtendLoad(MI, dataReg);
MI->getOperand(0).setReg(dataReg);
}
-
void
-AMDIL7XXIOExpansion::expandGlobalStore(MachineInstr *MI)
+AMDIL7XXIOExpansionImpl::expandGlobalStore(MachineInstr *MI)
{
uint32_t ID = getPointerID(MI);
mKM->setOutputInst();
DebugLoc DL = MI->getDebugLoc();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions go before the current MI.
- expandStoreSetupCode(MI);
+ expandStoreSetupCode(MI, addyReg, dataReg);
switch (getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
- .addReg(AMDIL::R1010)
- .addReg(AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREv4i32), AMDIL::MEM)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
break;
case 1:
mMFI->addErrorMsg(
amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREi32), AMDIL::MEMx)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
break;
case 2:
mMFI->addErrorMsg(
amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREi32), AMDIL::MEMx)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREi32), AMDIL::MEMx)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEMxy)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rxy1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREv2i32), AMDIL::MEMxy)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
break;
};
}
-
void
-AMDIL7XXIOExpansion::expandRegionStore(MachineInstr *MI)
+AMDIL7XXIOExpansionImpl::expandRegionStore(MachineInstr *MI)
{
bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
@@ -396,109 +396,110 @@
mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
}
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions go before the current MI.
- expandStoreSetupCode(MI);
+ expandStoreSetupCode(MI, addyReg, dataReg);
switch (getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1010)
+ .addReg(addyReg)
.addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rx1010)
+ .addReg(getCompReg(dataReg, sub_x_comp))
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
- .addReg(AMDIL::Ry1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Ry1010)
+ .addReg(getCompReg(dataReg, sub_y_comp))
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rz1010)
- .addReg(AMDIL::Rz1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rz1010)
+ .addReg(getCompReg(dataReg, sub_z_comp))
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rw1010)
- .addReg(AMDIL::Rw1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rw1010)
+ .addReg(getCompReg(dataReg, sub_w_comp))
.addImm(gID);
break;
case 1:
mMFI->addErrorMsg(
amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1012)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
(0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMUL_i32), AMDIL::Rx1006)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UMULi32rr), AMDIL::Rx1006)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1007)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(0xFFFFFF00))
.addImm(mMFI->addi32Literal(0x00FFFFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1007)
.addReg(AMDIL::Ry1008)
.addReg(AMDIL::Rx1007)
.addImm(mMFI->addi32Literal(0xFF00FFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1012)
.addReg(AMDIL::Rz1008)
.addReg(AMDIL::Rx1007)
.addImm(mMFI->addi32Literal(0xFFFF00FF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLi32i32rr), AMDIL::Rx1011)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1007);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rx1010)
.addReg(AMDIL::Rx1011)
.addImm(gID);
break;
case 2:
mMFI->addErrorMsg(
amd::CompilerErrorMessage[BYTE_STORE_ERROR]);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0x0000FFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1012)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(0x0000FFFF))
.addImm(mMFI->addi32Literal(0xFFFF0000));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(16))
.addImm(mMFI->addi32Literal(0));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLi32i32rr), AMDIL::Rx1011)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rx1010)
.addReg(AMDIL::Rx1011)
.addImm(gID);
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), addyReg)
+ .addReg(dataReg)
.addImm(gID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv2i32rr), AMDIL::Rxy1010)
+ .addReg(addyReg)
.addImm(mMFI->addi64Literal(1ULL << 32));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rx1010)
+ .addReg(getCompReg(dataReg, sub_y_comp))
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
- .addReg(AMDIL::Ry1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Ry1010)
+ .addReg(getCompReg(dataReg, sub_y_comp))
.addImm(gID);
break;
};
}
-
void
-AMDIL7XXIOExpansion::expandLocalStore(MachineInstr *MI)
+AMDIL7XXIOExpansionImpl::expandLocalStore(MachineInstr *MI)
{
bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
if (!HWLocal || !isHardwareLocal(MI)) {
@@ -511,10 +512,11 @@
mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
}
DebugLoc DL = MI->getDebugLoc();
- // These instructions go before the current MI.
- expandStoreSetupCode(MI);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC), AMDIL::MEM)
- .addReg(AMDIL::R1010)
- .addReg(AMDIL::R1011)
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
+ expandStoreSetupCode(MI, addyReg, dataReg);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32STOREv4i32r), AMDIL::MEM)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(lID);
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.cpp Wed Sep 12 12:43:34 2012
@@ -14,10 +14,9 @@
#include "AMDILAsmBackend.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-namespace llvm
-{
+namespace llvm {
ASM_BACKEND_CLASS* createAMDILAsmBackend(const ASM_BACKEND_CLASS &T,
- const std::string &TT)
+ const std::string &TT)
{
return new AMDILAsmBackend(T);
}
@@ -28,25 +27,21 @@
: ASM_BACKEND_CLASS()
{
}
-
MCObjectWriter *
AMDILAsmBackend::createObjectWriter(raw_ostream &OS) const
{
return 0;
}
-
bool
AMDILAsmBackend::doesSectionRequireSymbols(const MCSection &Section) const
{
return false;
}
-
bool
AMDILAsmBackend::isSectionAtomizable(const MCSection &Section) const
{
return true;
}
-
bool
AMDILAsmBackend::isVirtualSection(const MCSection &Section) const
{
@@ -59,38 +54,30 @@
uint64_t Value) const
{
}
-
bool
AMDILAsmBackend::mayNeedRelaxation(const MCInst &Inst
- ) const
+ ) const
{
return false;
}
-
bool
AMDILAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCInstFragment *DF,
- const MCAsmLayout &Layout) const
-{
+ const MCAsmLayout &Layout) const {
// Relax if the value is too big for a (signed) i8.
return int64_t(Value) != int64_t(int8_t(Value));
}
-
-
-
void
AMDILAsmBackend::relaxInstruction(const MCInst &Inst,
MCInst &Res) const
{
}
-
bool
AMDILAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const
{
return false;
}
-
unsigned
AMDILAsmBackend::getNumFixupKinds() const
{
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmBackend.h Wed Sep 12 12:43:34 2012
@@ -18,10 +18,8 @@
#define ASM_BACKEND_CLASS MCAsmBackend
using namespace llvm;
-namespace llvm
-{
-class AMDILAsmBackend : public ASM_BACKEND_CLASS
-{
+namespace llvm {
+class AMDILAsmBackend : public ASM_BACKEND_CLASS {
public:
AMDILAsmBackend(const ASM_BACKEND_CLASS &T);
virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const;
@@ -32,7 +30,7 @@
uint64_t Value) const;
virtual bool
mayNeedRelaxation(const MCInst &Inst
- ) const;
+ ) const;
virtual bool
fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
@@ -41,7 +39,7 @@
virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
unsigned getNumFixupKinds() const;
-}; // class AMDILAsmBackend;
+}; // class AMDILAsmBackend;
} // llvm namespace
#endif // _AMDIL_ASM_BACKEND_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.cpp Wed Sep 12 12:43:34 2012
@@ -57,21 +57,17 @@
const AMDILSubtarget *stm = &TM.getSubtarget<AMDILSubtarget>();
return stm->device()->getAsmPrinter(ASM_PRINTER_ARGUMENTS);
}
-
#include "AMDILGenAsmWriter.inc"
// Force static initialization
-extern "C" void LLVMInitializeAMDILAsmPrinter()
-{
+extern "C" void LLVMInitializeAMDILAsmPrinter() {
llvm::TargetRegistry::RegisterAsmPrinter(TheAMDILTarget,
- createAMDILCodePrinterPass);
+ createAMDILCodePrinterPass);
}
-
-AMDILInstPrinter *llvm::createAMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
-{
+AMDILInstPrinter *llvm::createAMDILInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
return new AMDILInstPrinter(MAI, MII, MRI);
}
-
//
// @param name
// @brief strips KERNEL_PREFIX and KERNEL_SUFFIX from the name
@@ -103,7 +99,6 @@
mMFI = NULL;
mAMI = NULL;
}
-
AMDILAsmPrinter::~AMDILAsmPrinter()
{
delete mMeta;
@@ -113,7 +108,6 @@
{
return "AMDIL Assembly Printer";
}
-
void
AMDILAsmPrinter::EmitInstruction(const MachineInstr *II)
{
@@ -122,7 +116,7 @@
formatted_raw_ostream O(OFunStr);
const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
if (mDebugMode) {
- O << ";" ;
+ O << ";";
II->print(O);
}
if (isMacroFunc(II)) {
@@ -172,7 +166,7 @@
} else {
printOperand(II, x
, O
- );
+ );
}
if (!x) {
O << "), (";
@@ -206,14 +200,25 @@
} else {
mMFI->addCalledIntr(macronum);
}
+ } else if (II->getOpcode() == AMDIL::COPY) {
+ printCopy(II, O);
} else {
-
printInstruction(II, O);
}
O.flush();
OutStreamer.EmitRawText(StringRef(FunStr));
}
void
+AMDILAsmPrinter::printCopy(const MachineInstr *MI,
+ OSTREAM_TYPE &O)
+{
+ O << "\tmov ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << "\n";
+}
+void
AMDILAsmPrinter::emitMacroFunc(const MachineInstr *MI,
OSTREAM_TYPE &O)
{
@@ -225,7 +230,6 @@
}
emitMCallInst(MI, O, name);
}
-
bool
AMDILAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
{
@@ -243,7 +247,6 @@
EmitFunctionBody();
return false;
}
-
void
AMDILAsmPrinter::addCPoolLiteral(const Constant *C)
{
@@ -261,11 +264,11 @@
if (CI->getBitWidth() == (int64_t)64) {
mMFI->addi64Literal(val);
} else if (CI->getBitWidth() == (int64_t)8) {
- mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i8);
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONSTi8);
} else if (CI->getBitWidth() == (int64_t)16) {
- mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i16);
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONSTi16);
} else {
- mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONST_i32);
+ mMFI->addi32Literal((uint32_t)val, AMDIL::LOADCONSTi32);
}
} else if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
uint32_t size = CA->getNumOperands();
@@ -273,9 +276,9 @@
addCPoolLiteral(CA->getOperand(x));
}
} else if (const ConstantAggregateZero *CAZ
- = dyn_cast<ConstantAggregateZero>(C)) {
+ = dyn_cast<ConstantAggregateZero>(C)) {
if (CAZ->isNullValue()) {
- mMFI->addi32Literal(0, AMDIL::LOADCONST_i32);
+ mMFI->addi32Literal(0, AMDIL::LOADCONSTi32);
mMFI->addi64Literal(0);
mMFI->addf64Literal((uint64_t)0);
mMFI->addf32Literal((uint32_t)0);
@@ -299,7 +302,6 @@
assert(0 && "Found a constant type that I don't know how to handle");
}
}
-
void
AMDILAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV)
{
@@ -317,12 +319,10 @@
O.flush();
OutStreamer.EmitRawText(O.str());
}
-
-
void
AMDILAsmPrinter::printOperand(const MachineInstr *MI, int opNum
, OSTREAM_TYPE &O
- )
+ )
{
const MachineOperand &MO = MI->getOperand (opNum);
@@ -334,13 +334,37 @@
// FIXME: we need to remove all virtual register creation after register allocation.
// This is a work-around to make sure that the virtual register range does not
// clobber the physical register range.
- O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI, opNum);
+ O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI,
+ opNum);
} else if (opNum == 0
- && (opcode == AMDIL::SCRATCHSTORE
- ||opcode == AMDIL::SCRATCHSTORE64)) {
+ && isAtomicInst(MI) && isStoreInst(MI)) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ OpSwizzle swiz;
+ unsigned reg = MI->getOperand(2).getReg();
+ swiz.u8all = MO.getTargetFlags();
+ O << "mem0";
+ if (isXComponentReg(reg)) {
+ O << getDstSwizzle(AMDIL_DST_X___);
+ } else if (isYComponentReg(reg)) {
+ O << getDstSwizzle(AMDIL_DST__Y__);
+ } else if (isZComponentReg(reg)) {
+ O << getDstSwizzle(AMDIL_DST___Z_);
+ } else if (isWComponentReg(reg)) {
+ O << getDstSwizzle(AMDIL_DST____W);
+ } else if (isXYComponentReg(reg)) {
+ O << getDstSwizzle(AMDIL_DST_XY__);
+ } else if (isZWComponentReg(reg)) {
+ O << getDstSwizzle(AMDIL_DST___ZW);
+ } else {
+ O << getDstSwizzle(AMDIL_DST_DFLT);
+ }
+ O << ", " << getRegisterName(MO.getReg()) << getSwizzle(MI, opNum);
+ } else if (opNum == 0
+ && isScratchInst(MI) && isStoreInst(MI)) {
O << getRegisterName(MO.getReg()) << ".x]";
- // If we aren't the vector register, print the dst swizzle.
- if (MI->getOperand(1).getReg() != AMDIL::R1011) {
+ uint32_t reg = MI->getOperand(1).getReg();
+ // If we aren't a vector register, print the dst swizzle.
+ if (reg < AMDIL::R1 || reg > AMDIL::R1012) {
O << getSwizzle(MI, opNum);
}
} else {
@@ -352,56 +376,10 @@
}
break;
case MachineOperand::MO_Immediate:
- case MachineOperand::MO_FPImmediate: {
- unsigned opcode = MI->getOpcode();
- if ((opNum == (int)(MI->getNumOperands() - 1))
- && ( (opcode >= AMDIL::ATOM_A_ADD
- && opcode <= AMDIL::ATOM_R_XOR_NORET_B64)
- || (opcode >= AMDIL::ATOM64_G_ADD
- && opcode <= AMDIL::ATOM64_R_XOR_NORET_B64)
- || opcode == AMDIL::SEMAPHORE_INIT
- || (opcode >= AMDIL::SCRATCHLOAD
- && opcode <= AMDIL::SCRATCHSTORE)
- || (opcode >= AMDIL::LDSLOAD && opcode <= AMDIL::LDSSTORE_i8)
- || (opcode >= AMDIL::GDSLOAD && opcode <= AMDIL::GDSSTORE)
- || (opcode >= AMDIL::UAVARENALOAD_i16
- && opcode <= AMDIL::UAVRAWSTORE_v4i32)
- || opcode == AMDIL::CBLOAD
- || opcode == AMDIL::CASE)
- ) {
- O << MO.getImm();
- } else if (((opcode >= AMDIL::VEXTRACT_v2f32
- && opcode <= AMDIL::VEXTRACT_v4i8)
- && (opNum == 2))) {
- // The swizzle is encoded in the operand so the
- // literal that represents the swizzle out of ISel
- // can be ignored.
- } else if ((opcode >= AMDIL::VINSERT_v2f32)
- && (opcode <= AMDIL::VINSERT_v4i8)
- && ((opNum == 3) || (opNum == 4))) {
- // The swizzle is encoded in the operand so the
- // literal that represents the swizzle out of ISel
- // can be ignored.
- // The swizzle is encoded in the operand so the
- // literal that represents the swizzle out of ISel
- // can be ignored.
- } else if (opNum == 1 &&
- (isAppendInst(TM, MI)
- || isReadImageInst(TM, MI)
- || isImageTXLDInst(TM, MI)
- || opcode == AMDIL::CBLOAD)) {
- // We don't need to emit the 'l' so we just emit
- // the immediate as it stores the resource ID and
- // is not a true literal.
- O << MO.getImm();
- } else if (opNum == 0 &&
- (opcode == AMDIL::SEMAPHORE_INIT
- || opcode == AMDIL::SEMAPHORE_WAIT
- || opcode == AMDIL::SEMAPHORE_SIGNAL
- || isReadImageInst(TM, MI)
- || isWriteImageInst(TM, MI))) {
- O << MO.getImm();
- } else if (opNum == 3 && isReadImageInst(TM, MI)) {
+ case MachineOperand::MO_FPImmediate:
+ {
+ if (isSkippedLiteral(MI, opNum)) {
+ } else if (isBypassedLiteral(MI, opNum)) {
O << MO.getImm();
} else if (MO.isImm() || MO.isFPImm()) {
O << "l" << MO.getImm() << getSwizzle(MI, opNum);
@@ -415,7 +393,8 @@
case MachineOperand::MO_MachineBasicBlock:
EmitBasicBlockStart(MO.getMBB());
return;
- case MachineOperand::MO_GlobalAddress: {
+ case MachineOperand::MO_GlobalAddress:
+ {
int offset = 0;
const GlobalValue *gv = MO.getGlobal();
// Here we look up by the name for the corresponding number
@@ -431,11 +410,11 @@
} else if((offset = mAMI->getArrayOffset(gv->getName()))
!= -1) {
mMFI->setUsesLDS();
- O << "l" << mMFI->getIntLits(offset) << ".x";
+ O << "l" << mMFI->getLitIdx((uint32_t)offset) << ".x";
} else if((offset = mAMI->getConstOffset(gv->getName()))
!= -1) {
mMFI->addMetadata(";memory:datareqd");
- O << "l" << mMFI->getIntLits(offset) << ".x";
+ O << "l" << mMFI->getLitIdx((uint32_t)offset) << ".x";
mMFI->setUsesConstant();
} else {
assert(0 && "GlobalAddress without a function call!");
@@ -444,10 +423,11 @@
}
}
break;
- case MachineOperand::MO_ExternalSymbol: {
+ case MachineOperand::MO_ExternalSymbol:
+ {
if (MI->getOpcode() == AMDIL::CALL) {
uint32_t funcNum = mAMI->getOrCreateFunctionID(
- std::string(MO.getSymbolName()));
+ std::string(MO.getSymbolName()));
mMFI->addCalledFunc(funcNum);
O << funcNum << " ; "<< MO.getSymbolName();
// This is where pointers should get resolved
@@ -458,32 +438,31 @@
}
}
break;
- case MachineOperand::MO_ConstantPoolIndex: {
+ case MachineOperand::MO_ConstantPoolIndex:
+ {
// Copies of constant buffers need to be done here
const AMDILKernel *tmp = mAMI->getKernel(mKernelName);
- O << "l" << mMFI->getIntLits(
- tmp->CPOffsets[MO.getIndex()].first);
+ O << "l" << mMFI->getLitIdx(
+ tmp->CPOffsets[MO.getIndex()].first);
}
break;
default:
- O << "<unknown operand type>";
- break;
+ O << "<unknown operand type>"; break;
}
}
-
void
AMDILAsmPrinter::printMemOperand(
const MachineInstr *MI,
int opNum,
OSTREAM_TYPE &O,
const char *Modifier
-)
+ )
{
const MachineOperand &MO = MI->getOperand (opNum);
if (opNum != 1) {
printOperand(MI, opNum
, O
- );
+ );
} else {
switch (MO.getType()) {
case MachineOperand::MO_Register:
@@ -493,57 +472,34 @@
// FIXME: we need to remove all virtual register creation after register allocation.
// This is a work-around to make sure that the virtual register range does not
// clobber the physical register range.
- O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI, opNum);
+ O << "r" << ((MO.getReg() & 0x7FFFFFFF) + 2048) << getSwizzle(MI,
+ opNum);
} else if (opNum == 0
- && (opcode == AMDIL::SCRATCHSTORE
- ||opcode == AMDIL::SCRATCHSTORE64)) {
- O << getRegisterName(MO.getReg()) << ".x]" << getSwizzle(MI, opNum);
+ && isScratchInst(MI)) {
+ O << getRegisterName(MO.getReg()) << ".x]";
+ uint32_t reg = MI->getOperand(1).getReg();
+ // If we aren't the vector register, print the dst swizzle.
+ if (reg < AMDIL::R1 || reg > AMDIL::R1012) {
+ O << getSwizzle(MI, opNum);
+ }
} else {
O << getRegisterName(MO.getReg()) << getSwizzle(MI, opNum);
}
- } else {
+ }
+ else {
assert(0 && "Invalid Register type");
mMFI->addErrorMsg(
amd::CompilerErrorMessage[INTERNAL_ERROR]);
}
break;
case MachineOperand::MO_Immediate:
- case MachineOperand::MO_FPImmediate: {
- unsigned opcode = MI->getOpcode();
- if ((opNum == (int)(MI->getNumOperands() - 1))
- && ((opcode >= AMDIL::ATOM_A_ADD
- && opcode <= AMDIL::ATOM_R_XOR_B64)
- || opcode == AMDIL::SEMAPHORE_INIT
- || (opcode >= AMDIL::SCRATCHLOAD
- && opcode <= AMDIL::SCRATCHSTORE)
- || (opcode >= AMDIL::LDSLOAD && opcode <= AMDIL::LDSSTORE_i8)
- || (opcode >= AMDIL::GDSLOAD && opcode <= AMDIL::GDSSTORE)
- || (opcode >= AMDIL::UAVARENALOAD_i32
- && opcode <= AMDIL::UAVRAWSTORE_v4i32)
- || opcode == AMDIL::CBLOAD
- || opcode == AMDIL::CASE)
- ) {
- O << MO.getImm();
- } else if (opNum == 1 &&
- (isAppendInst(TM, MI)
- || isReadImageInst(TM, MI)
- || isImageTXLDInst(TM, MI)
- || opcode == AMDIL::CBLOAD)) {
- // We don't need to emit the 'l' so we just emit
- // the immediate as it stores the resource ID and
- // is not a true literal.
- O << MO.getImm();
- } else if (opNum == 0 &&
- (opcode == AMDIL::SEMAPHORE_INIT
- || opcode == AMDIL::SEMAPHORE_WAIT
- || opcode == AMDIL::SEMAPHORE_SIGNAL
- || isReadImageInst(TM, MI)
- || isWriteImageInst(TM, MI))) {
- O << MO.getImm();
- } else if (opNum == 3 && isReadImageInst(TM, MI)) {
+ case MachineOperand::MO_FPImmediate:
+ {
+ if (isSkippedLiteral(MI, opNum)) {
+ } else if (isBypassedLiteral(MI, opNum)) {
O << MO.getImm();
} else if (MO.isImm() || MO.isFPImm()) {
- O << "l" << MO.getImm();
+ O << "l" << MO.getImm() << getSwizzle(MI, opNum);
} else {
assert(0 && "Invalid literal/constant type");
mMFI->addErrorMsg(
@@ -551,21 +507,19 @@
}
}
break;
- case MachineOperand::MO_ConstantPoolIndex: {
+ case MachineOperand::MO_ConstantPoolIndex:
+ {
// Copies of constant buffers need to be done here
const AMDILKernel *tmp = mAMI->getKernel(mKernelName);
- O << "l" << mMFI->getIntLits(
- tmp->CPOffsets[MO.getIndex()].first);
+ O << "l" << mMFI->getLitIdx(
+ tmp->CPOffsets[MO.getIndex()].first);
}
break;
default:
- O << "<unknown operand type>";
- break;
+ O << "<unknown operand type>"; break;
};
}
}
-
-
const char*
AMDILAsmPrinter::getSwizzle(const MachineInstr *MI, int opNum)
{
@@ -578,7 +532,6 @@
return getDstSwizzle(swiz.bits.swizzle);
}
}
-
void
AMDILAsmPrinter::EmitStartOfAsmFile(Module &M)
{
@@ -587,7 +540,6 @@
const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
curTarget->setKernelManager(mMeta);
-
if (curTarget->device()->isSupported(
AMDILDeviceInfo::MacroDB)) {
// Since we are using the macro db, the first token must be a macro.
@@ -602,7 +554,6 @@
O << "mend\n";
}
-
// We need to increase the number of reserved literals for
// any literals we output manually instead of via the
// emitLiteral function. This function should never
@@ -669,7 +620,9 @@
}
bool
AMDILAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned int OpNo, unsigned int AsmVariant, const char *ExtraCode)
+ unsigned int OpNo,
+ unsigned int AsmVariant,
+ const char *ExtraCode)
{
assert(0 && "When is this function hit!");
return false;
@@ -681,13 +634,13 @@
}
void
AMDILAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
- const MachineBasicBlock *MBB) const
+ const MachineBasicBlock *MBB) const
{
assert(0 && "When is this function hit!");
}
void
AMDILAsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
- const MachineBasicBlock *MBB) const
+ const MachineBasicBlock *MBB) const
{
assert(0 && "When is this function hit!");
}
@@ -698,7 +651,6 @@
{
assert(0 && "When is this function hit!");
}
-
void
AMDILAsmPrinter::EmitFunctionBodyStart()
{
@@ -745,7 +697,8 @@
// Add the literals for the offsets and sizes of
// all the kernel constant arrays
- llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator cpb, cpe;
+ llvm::SmallVector<AMDILConstPtr,
+ DEFAULT_VEC_SLOTS>::const_iterator cpb, cpe;
for (cpb = tmp.constPtr.begin(), cpe = tmp.constPtr.end();
cpb != cpe; ++cpb) {
mMFI->addi32Literal(cpb->size);
@@ -819,7 +772,7 @@
{
const MachineConstantPool *MCP = MF->getConstantPool();
const std::vector<MachineConstantPoolEntry> &consts
- = MCP->getConstants();
+ = MCP->getConstants();
for (uint32_t x = 0, s = consts.size(); x < s; ++x) {
addCPoolLiteral(consts[x].Val.ConstVal);
}
@@ -831,11 +784,9 @@
return;
assert(0 && "When is this function hit!");
}
-
/// getDebugResourceLocation - Get resource id information encoded in
/// target flags.
-uint32_t AMDILAsmPrinter::getDebugResourceID(const MachineInstr *MI) const
-{
+uint32_t AMDILAsmPrinter::getDebugResourceID(const MachineInstr *MI) const {
const llvm::MachineOperand& opr = MI->getOperand(MI->getNumOperands() - 1);
assert(opr.isMetadata());
const MDNode *Var = opr.getMetadata();
@@ -843,16 +794,12 @@
uint32_t resourceID = mMeta->getUAVID(valOfVar);
return resourceID;
}
-
bool
-AMDILAsmPrinter::isMacroCall(const MachineInstr *MI)
-{
+AMDILAsmPrinter::isMacroCall(const MachineInstr *MI) {
return !strncmp(mTM->getInstrInfo()->getName(MI->getOpcode()), "MACRO", 5);
}
-
bool
-AMDILAsmPrinter::isMacroFunc(const MachineInstr *MI)
-{
+AMDILAsmPrinter::isMacroFunc(const MachineInstr *MI) {
if (MI->getOpcode() != AMDIL::CALL) {
return false;
}
@@ -882,11 +829,13 @@
} else if (reg >= AMDIL::Rzw1 && reg < AMDIL::SDP) {
return ((dst) ? ".__zw" : ".00zw");
} else {
- return "";
+ return "";
}
}
void
-AMDILAsmPrinter::emitMCallInst(const MachineInstr *MI, OSTREAM_TYPE &O, const char *name)
+AMDILAsmPrinter::emitMCallInst(const MachineInstr *MI,
+ OSTREAM_TYPE &O,
+ const char *name)
{
const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
int macronum = amd::MacroDBFindMacro(name);
@@ -915,7 +864,7 @@
.getRegisterInfo()->getRegClass(AMDIL::GPRF32RegClassID);
}
O << "\tmcall(" << macronum << ")(";
- int x ;
+ int x;
for (x = 0; x < numOut - 1; ++x) {
O << getRegisterName(trc->getRegister(x))
<< getRegSwizzle(trc->getRegister(x), true) << ", ";
@@ -930,16 +879,13 @@
<< getRegSwizzle(trc->getRegister(x), false) << ")";
O << " ;" << name <<"\n";
}
-
#if defined(LLVM_29) || defined(USE_APPLE)
void
-AMDILAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const
-{
+AMDILAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
}
#else
void
-AMDILAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const
-{
+AMDILAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
const TargetRegisterInfo *RI = TM.getRegisterInfo();
unsigned reg = MLoc.getReg();
unsigned baseReg = AMDIL::R1;
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILAsmPrinter.h Wed Sep 12 12:43:34 2012
@@ -42,7 +42,6 @@
class MCInst;
class MCContext;
-
class LLVM_LIBRARY_VISIBILITY AMDILAsmPrinter : public AsmPrinter
{
public:
@@ -70,7 +69,7 @@
virtual void
printOperand(const MachineInstr *MI, int opNum
, OSTREAM_TYPE &O
- );
+ );
void
EmitGlobalVariable(const GlobalVariable *GV);
@@ -131,7 +130,7 @@
// the assembly format of the specified instruction
//
void
- printInstruction(const MachineInstr *MI , OSTREAM_TYPE &O); // autogenerated
+ printInstruction(const MachineInstr *MI, OSTREAM_TYPE &O); // autogenerated
const char *getRegisterName(unsigned RegNo);
@@ -161,7 +160,6 @@
bool
isMacroFunc(const MachineInstr *MI);
-
//
// @param MI Machine instruction to print swizzle for
// @param opNum the operand number to print swizzle for
@@ -179,7 +177,6 @@
/// EmitDwarfRegOp - Emit dwarf register operation
virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
-
protected:
//
// @param MI Machine instruction to emit the macro code for
@@ -188,12 +185,14 @@
// registers as the macro arguments.
//
virtual void
- emitMacroFunc(const MachineInstr *MI , OSTREAM_TYPE &O);
+ emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
// Flag whether to print out debug information
// or not.
bool mDebugMode;
+ void
+ printCopy(const MachineInstr *MI, OSTREAM_TYPE &O);
//
//
void
@@ -237,8 +236,6 @@
/// to print assembly version information in the metadata
bool mNeedVersion;
};
-
-
} // end of llvm namespace
#endif // _AMDIL_ASM_PRINTER_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBarrierDetect.cpp Wed Sep 12 12:43:34 2012
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "BarrierDetect"
-#ifdef DEBUG
+#define DEBUG_TYPE "barrierdetect"
+#if !defined(NDEBUG)
#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
#else
#define DEBUGME 0
@@ -45,8 +45,8 @@
{
class LLVM_LIBRARY_VISIBILITY AMDILBarrierDetect : public FunctionPass
{
- TargetMachine &TM;
- static char ID;
+TargetMachine &TM;
+static char ID;
public:
AMDILBarrierDetect(TargetMachine &TM, CodeGenOpt::Level OptLevel);
~AMDILBarrierDetect();
@@ -80,16 +80,13 @@
AMDILBarrierDetect::AMDILBarrierDetect(TargetMachine &TM,
CodeGenOpt::Level OptLevel)
- :
- FunctionPass(ID),
- TM(TM)
+ : FunctionPass(ID),
+ TM(TM)
{
}
-
AMDILBarrierDetect::~AMDILBarrierDetect()
{
}
-
bool AMDILBarrierDetect::detectBarrier(BasicBlock::iterator *BBI)
{
SmallVector<int64_t, DEFAULT_VEC_SLOTS>::iterator bIter;
@@ -100,8 +97,23 @@
if (!CI || !CI->getNumOperands()) {
return false;
}
+
const Value *funcVal = CI->getOperand(CI->getNumOperands() - 1);
- if (funcVal && strncmp(funcVal->getName().data(), "barrier", 7)) {
+
+ if (!funcVal) {
+ return false;
+ }
+
+ const StringRef& funcName = funcVal->getName();
+
+ if (funcName.startswith("__amdil_gws")) {
+ AMDILMachineFunctionInfo *MFI =
+ getAnalysis<MachineFunctionAnalysis>().getMF()
+ .getInfo<AMDILMachineFunctionInfo>();
+ MFI->addMetadata(";memory:gws");
+ return false;
+ } else if (!funcName.startswith("barrier") &&
+ !funcName.startswith("__amd_barrier")) {
return false;
}
@@ -130,9 +142,11 @@
MFI->addMetadata(";limitgroupsize");
MFI->setUsesLDS();
}
- const Value *V = inst->getOperand(inst->getNumOperands()-2);
+
+ const Value *V = inst->getOperand(inst->getNumOperands() - 2);
const ConstantInt *Cint = dyn_cast<ConstantInt>(V);
- Function *iF = dyn_cast<Function>(inst->getOperand(inst->getNumOperands()-1));
+ Function *iF = dyn_cast<Function>(inst->getOperand(inst->getNumOperands() - 1));
+
Module *M = iF->getParent();
bID = Cint->getSExtValue();
if (bID > 0) {
@@ -148,13 +162,12 @@
}
Function *nF =
dyn_cast<Function>(M->getOrInsertFunction(name, iF->getFunctionType()));
- inst->setOperand(inst->getNumOperands()-1, nF);
+ inst->setOperand(inst->getNumOperands() - 1, nF);
return false;
}
return false;
}
-
bool AMDILBarrierDetect::runOnFunction(Function &MF)
{
mChanged = false;
@@ -167,22 +180,18 @@
&AMDILBarrierDetect::detectBarrier), this));
return mChanged;
}
-
const char* AMDILBarrierDetect::getPassName() const
{
return "AMDIL Barrier Detect Pass";
}
-
bool AMDILBarrierDetect::doInitialization(Module &M)
{
return false;
}
-
bool AMDILBarrierDetect::doFinalization(Module &M)
{
return false;
}
-
void AMDILBarrierDetect::getAnalysisUsage(AnalysisUsage &AU) const
{
AU.addRequired<MachineFunctionAnalysis>();
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILBase.td Wed Sep 12 12:43:34 2012
@@ -49,20 +49,29 @@
"true",
"specify whether to not inline functions">;
-def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+def Feature64BitPtr : SubtargetFeature<"64bitptr",
"mIs64bit",
- "false",
+ "true",
"Specify if 64bit addressing should be used.">;
-def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+def Feature32on64BitPtr : SubtargetFeature<"64on32bitptr",
"mIs32on64bit",
- "false",
+ "true",
"Specify if 64bit sized pointers with 32bit addressing should be used.">;
def FeatureDebug : SubtargetFeature<"debug",
"CapsOverride[AMDILDeviceInfo::Debug]",
"true",
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureMetadata30 : SubtargetFeature<"metadata30",
+ "mMetadata30",
+ "true",
+ "Enable generation of version 3.0 of the metadata spec">;
+
+def FeatureFlatAS : SubtargetFeature<"flatas", "mFlatAddress", "true",
+ "Enable flat address space override for global/local/scratch/constant memory.">;
+
+
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCFGStructurizer.cpp Wed Sep 12 12:43:34 2012
@@ -73,36 +73,33 @@
if (DEBUGME) errs() << "New instr: " << *i << "\n"
#define SHOWNEWBLK(b, msg) \
-if (DEBUGME) { \
- errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- errs() << "\n"; \
-}
+ if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+ }
#define SHOWBLK_DETAIL(b, msg) \
-if (DEBUGME) { \
- if (b) { \
- errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- b->print(errs()); \
- errs() << "\n"; \
- } \
-}
+ if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+ }
#define INVALIDSCCNUM -1
#define INVALIDREGNUM 0
template<class LoopinfoT>
-void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS)
-{
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
iterEnd = LoopInfo.end();
iter != iterEnd; ++iter) {
(*iter)->print(OS, 0);
}
}
-
template<class NodeT>
-void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src)
-{
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
size_t sz = Src.size();
for (size_t i = 0; i < sz/2; ++i) {
NodeT *t = Src[i];
@@ -110,20 +107,20 @@
Src[sz - i - 1] = t;
}
}
-
} //end namespace llvmCFGStruct
-static MachineInstr *getLastBreakInstr(MachineBasicBlock *blk)
-{
- for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(); (iter != blk->rend()); ++iter) {
+static MachineInstr *getLastBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ (iter != blk->rend());
+ ++iter) {
MachineInstr *instr = &(*iter);
- if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+ if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZi32r) ||
+ (instr->getOpcode() == AMDIL::BREAK_LOGICALZi32r)) {
return instr;
}
}
return NULL;
}
-
//===----------------------------------------------------------------------===//
//
// MachinePostDominatorTree
@@ -135,11 +132,8 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/DominatorInternals.h"
-namespace llvm
-{
-
+namespace llvm {
extern void initializeMachinePostDominatorTreePass(PassRegistry&);
-FunctionPass *createMachinePostDominatorTreePass();
/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
/// to compute the a post-dominator tree.
@@ -147,12 +141,12 @@
struct MachinePostDominatorTree : public MachineFunctionPass {
static char ID; // Pass identification, replacement for typeid
DominatorTreeBase<MachineBasicBlock> *DT;
- MachinePostDominatorTree() : MachineFunctionPass(ID) {
+ MachinePostDominatorTree() : MachineFunctionPass(ID)
+ {
initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
// postdominator
}
-
~MachinePostDominatorTree();
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -161,46 +155,36 @@
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
inline const std::vector<MachineBasicBlock *> &getRoots() const {
return DT->getRoots();
}
-
inline MachineDomTreeNode *getRootNode() const {
return DT->getRootNode();
}
-
inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
return DT->getNode(BB);
}
-
inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
return DT->getNode(BB);
}
-
inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
return DT->dominates(A, B);
}
-
inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
return DT->dominates(A, B);
}
-
inline bool
properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
return DT->properlyDominates(A, B);
}
-
inline bool
properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
return DT->properlyDominates(A, B);
}
-
inline MachineBasicBlock *
findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
return DT->findNearestCommonDominator(A, B);
}
-
virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
DT->print(OS);
}
@@ -210,28 +194,22 @@
char MachinePostDominatorTree::ID = 0;
INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
"MachinePostDominator Tree Construction",
- true, true)
+ true, true);
-FunctionPass *llvm::createMachinePostDominatorTreePass()
-{
+FunctionPass *llvm::createMachinePostDominatorTreePass() {
return new MachinePostDominatorTree();
}
-
//const PassInfo *const llvm::MachinePostDominatorsID
//= &machinePostDominatorTreePass;
-bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F)
-{
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
DT->recalculate(F);
//DEBUG(DT->dump());
return false;
}
-
-MachinePostDominatorTree::~MachinePostDominatorTree()
-{
+MachinePostDominatorTree::~MachinePostDominatorTree() {
delete DT;
}
-
//===----------------------------------------------------------------------===//
//
// supporting data structure for CFGStructurizer
@@ -245,38 +223,37 @@
};
template <class InstrT>
-class BlockInformation
-{
+class BlockInformation {
public:
bool isRetired;
- int sccNum;
+ int sccNum;
//SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
//Instructions defining the corresponding successor.
- BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {
+ }
};
template <class BlockT, class InstrT, class RegiT>
-class LandInformation
-{
+class LandInformation {
public:
BlockT *landBlk;
std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
- //WHILELOOP(thisloop) init before entering
- //thisloop.
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
- //WHILELOOP(thisloop) init after entering
- //thisloop.
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
std::set<RegiT> endbranchInitRegs; //Init after entering this loop, at loop
- //land block, branch cond on this reg.
+ //land block, branch cond on this reg.
std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
- //endif" after ENDLOOP(thisloop) break
- //outerLoopOf(thisLoop).
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
- //endif" after ENDLOOP(thisloop) continue on
- //outerLoopOf(thisLoop).
- LandInformation() : landBlk(NULL) {}
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {
+ }
};
-
} //end of namespace llvmCFGStruct
//===----------------------------------------------------------------------===//
@@ -288,12 +265,11 @@
namespace llvmCFGStruct
{
// Stores the list of defs and uses of a virtual register
-class DefUseList
-{
- enum {
- FLAG_DEF = 0,
- FLAG_USE = 1
- };
+class DefUseList {
+enum {
+ FLAG_DEF = 0,
+ FLAG_USE = 1
+};
public:
// struct that represents a single def or use
@@ -307,7 +283,8 @@
return _flag == FLAG_USE;
}
DefOrUseT(unsigned slotIndex, unsigned flag)
- : _slotIndex(slotIndex), _flag(flag) {}
+ : _slotIndex(slotIndex), _flag(flag) {
+ }
};
private:
@@ -319,7 +296,8 @@
DefUseVecT _defUses;
- DefUseList() : _defUses() {}
+ DefUseList() : _defUses() {
+ }
void addDef(unsigned slotIndex) {
_defUses.push_back(DefOrUseT(slotIndex, FLAG_DEF));
}
@@ -352,6 +330,7 @@
assert(it != e && "no def/use");
const_iterator pre = it;
for (++it; it != e; ++it) {
+ const DefOrUseT& defOrUse = *it;
if ((*pre)._slotIndex > (*it)._slotIndex) {
return false;
}
@@ -359,7 +338,6 @@
}
return true;
}
-
void DefUseList::dump() const
{
for (const_iterator it = begin(), e = end(); it != e; ++it) {
@@ -368,23 +346,22 @@
errs() << " " << defOrUse._slotIndex << " " << str << "\n";
}
}
-
// a live interval
-class LiveInterval
-{
- enum {
- UndefinedSlotIndex = -1
- };
- unsigned _vreg;
- int _startSlotIndex;
- int _endSlotIndex;
+class LiveInterval {
+enum {
+ UndefinedSlotIndex = -1
+};
+unsigned _vreg;
+int _startSlotIndex;
+int _endSlotIndex;
public:
LiveInterval(unsigned vreg)
: _vreg(vreg),
_startSlotIndex(UndefinedSlotIndex),
_endSlotIndex(UndefinedSlotIndex)
- {}
+ {
+ }
bool hasStart() const {
return _startSlotIndex != UndefinedSlotIndex;
}
@@ -409,9 +386,8 @@
};
// a list of live intervals
-class LiveIntervals
-{
- typedef SmallVector<LiveInterval, 16> IntervalVecType;
+class LiveIntervals {
+typedef SmallVector<LiveInterval, 16> IntervalVecType;
public:
typedef IntervalVecType::iterator iterator;
@@ -425,7 +401,8 @@
iterator findIntervalImpl(unsigned vreg);
public:
- LiveIntervals(bool sorted) : _intervals(), _sorted(sorted) {}
+ LiveIntervals(bool sorted) : _intervals(), _sorted(sorted) {
+ }
LiveInterval* findInterval(unsigned vreg) {
iterator it = findIntervalImpl(vreg);
if (it == _intervals.end()) {
@@ -452,7 +429,6 @@
}
insertIntervalSorted(interval);
}
-
void removeInterval(unsigned vreg);
iterator removeInterval(iterator it) {
return _intervals.erase(it);
@@ -484,7 +460,6 @@
}
return it;
}
-
void LiveIntervals::insertIntervalSorted(LiveInterval& interval)
{
iterator it = _intervals.begin();
@@ -496,14 +471,12 @@
}
_intervals.insert(it, interval);
}
-
void LiveIntervals::removeInterval(unsigned vreg)
{
iterator it = findIntervalImpl(vreg);
assert(it != _intervals.end() && "interval not found");
_intervals.erase(it);
}
-
bool LiveIntervals::isSortedByStart() const
{
const_iterator it = _intervals.begin();
@@ -521,7 +494,6 @@
}
return true;
}
-
void LiveIntervals::dump() const
{
errs() << "Intervals:\n";
@@ -534,7 +506,6 @@
<< " end " << interval.end() << "\n";
}
}
-
// Trivial linear scan register allocator to allocate physical registers
// for registers requested during CFGStructurizer pass.
// Since register allocator has already been run before this pass, we have
@@ -542,10 +513,10 @@
// for registers requested during this pass.
class TrivialRegAlloc
{
- typedef SmallVector<LiveIntervals, 2> IntervalsVecT;
- typedef std::map<unsigned, unsigned> RegMapT;
- typedef std::set<unsigned> RegSetT;
- typedef std::map<unsigned, DefUseList*> VRegDefUseMapT;
+typedef SmallVector<LiveIntervals, 2> IntervalsVecT;
+typedef std::map<unsigned, unsigned> RegMapT;
+typedef std::set<unsigned> RegSetT;
+typedef std::map<unsigned, DefUseList*> VRegDefUseMapT;
private:
// data structures passed in to this class
@@ -595,7 +566,6 @@
{
assert(_regClass.getID() == AMDIL::GPRI32RegClassID && "unimplemented");
}
-
TrivialRegAlloc::~TrivialRegAlloc()
{
for (VRegDefUseMapT::iterator I = _vregDefUseMap.begin(),
@@ -604,7 +574,6 @@
delete I->second;
}
}
-
// find all physical registers that are still available after the global
// register allocator
static void findAvailPhysRegs(MachineFunction& func,
@@ -625,7 +594,6 @@
}
}
}
-
// initialize the register set with remaining physical registers that are still
// available and the set of physical registers reserved for CFGStructurizer
void TrivialRegAlloc::initRegSet()
@@ -644,22 +612,18 @@
errs() << "\n";
}
}
-
// compute live intervals for the virtual registers created during
// CFGStructurizer pass
-void TrivialRegAlloc::computeIntervals()
-{
+void TrivialRegAlloc::computeIntervals() {
MachineBasicBlock* entryBlk
- = GraphTraits<MachineFunction*>::nodes_begin(&_func);
+ = GraphTraits<MachineFunction*>::nodes_begin(&_func);
unsigned slotIndex = 0;
- if (DEBUGME) errs() << "start computeIntervals()\n";
// there is only one block now in the function
for (MachineBasicBlock::iterator iter = entryBlk->begin(),
iterEnd = entryBlk->end();
iter != iterEnd;
++iter) {
MachineInstr* inst = iter;
- if (DEBUGME) errs() << *inst;
for (unsigned i = 0; i < inst->getNumOperands(); ++i) {
MachineOperand& oper = inst->getOperand(i);
if (!oper.isReg() || !oper.getReg()) {
@@ -670,7 +634,6 @@
if (!_vregs.count(vreg)) {
continue;
}
- if (DEBUGME) errs() << " oper " << oper << " vreg " << TargetRegisterInfo::virtReg2Index(vreg) << "\n";
// add to vreg's def/use list
DefUseList*& defUses = _vregDefUseMap[vreg];
LiveInterval* interval = _intervals.findInterval(vreg);
@@ -688,10 +651,12 @@
errs() << "interval for vreg "
<< TargetRegisterInfo::virtReg2Index(vreg)
<< " start at " << slotIndex << "\n";
- } else {
+ }
+ else {
assert(slotIndex > interval->start() && "sanity");
}
- } else {
+ }
+ else {
assert(defUses && "use before def");
defUses->addUse(slotIndex);
assert(interval && "use before def");
@@ -726,7 +691,6 @@
}
#endif
}
-
// pick a physical register that is not in use
unsigned TrivialRegAlloc::getPhysicalRegister()
{
@@ -741,7 +705,6 @@
abort();
return 0;
}
-
// allocate a physical register for the live interval
void TrivialRegAlloc::allocateRegisterFor(LiveInterval& interval)
{
@@ -754,7 +717,6 @@
<< TargetRegisterInfo::virtReg2Index(vreg) << "\n";
// _func->getRegInfo().setPhysRegUsed(tempReg);
}
-
// release physical register allocated for the interval
void TrivialRegAlloc::releaseRegisterFor(const LiveInterval& interval)
{
@@ -768,22 +730,21 @@
assert(i < _regSet.size() && "invalid physical regsiter");
_regInUse[i] = 0;
}
-
// remove out of active intervals list if an interval becomes inactive
void TrivialRegAlloc::handleActiveIntervals(unsigned pos)
{
for (LiveIntervals::iterator it = _activeIntervals.begin();
- it != _activeIntervals.end();) {
+ it != _activeIntervals.end(); ) {
LiveInterval& interval = *it;
if (pos > interval.end()) {
releaseRegisterFor(interval);
it = _activeIntervals.removeInterval(it);
- } else {
+ }
+ else {
++it;
}
}
}
-
// allocate physical registers for each live interval in the interval list
void TrivialRegAlloc::allocateRegisters()
{
@@ -801,12 +762,10 @@
allocateRegisterFor(interval);
}
}
-
// rewrite the machine instructions to use the physical registers allocated
-void TrivialRegAlloc::rewrite()
-{
+void TrivialRegAlloc::rewrite() {
MachineBasicBlock* entryBlk
- = GraphTraits<MachineFunction*>::nodes_begin(&_func);
+ = GraphTraits<MachineFunction*>::nodes_begin(&_func);
// there is only one block now in the function
for (MachineBasicBlock::iterator iter = entryBlk->begin(),
iterEnd = entryBlk->end();
@@ -829,7 +788,6 @@
}
}
}
-
// the main driver of this register allocator
void TrivialRegAlloc::run()
{
@@ -838,7 +796,6 @@
allocateRegisters();
rewrite();
}
-
//===----------------------------------------------------------------------===//
//
// CFGStructurizer
@@ -857,29 +814,29 @@
} PathToKind;
public:
- typedef typename PassT::InstructionType InstrT;
- typedef typename PassT::FunctionType FuncT;
- typedef typename PassT::DominatortreeType DomTreeT;
- typedef typename PassT::PostDominatortreeType PostDomTreeT;
- typedef typename PassT::DomTreeNodeType DomTreeNodeT;
- typedef typename PassT::LoopinfoType LoopInfoT;
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
typedef GraphTraits<FuncT *> FuncGTraits;
//typedef FuncGTraits::nodes_iterator BlockIterator;
- typedef typename FuncT::iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
- typedef typename FuncGTraits::NodeType BlockT;
+ typedef typename FuncGTraits::NodeType BlockT;
typedef GraphTraits<BlockT *> BlockGTraits;
typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
//typedef BlockGTraits::succ_iterator InstructionIterator;
- typedef typename BlockT::iterator InstrIterator;
+ typedef typename BlockT::iterator InstrIterator;
typedef CFGStructTraits<PassT> CFGTraits;
typedef BlockInformation<InstrT> BlockInfo;
typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
- typedef int RegiT;
- typedef typename PassT::LoopType LoopT;
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
//landing info for loop break
@@ -1008,21 +965,16 @@
}; //template class CFGStructurizer
template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
- : domTree(NULL), postDomTree(NULL), loopInfo(NULL)
-{
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
}
-
-template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer()
-{
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
E = blockInfoMap.end(); I != E; ++I) {
delete I->second;
}
}
-
template<class PassT>
-bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass)
-{
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass) {
passRep = &pass;
funcRep = &func;
@@ -1107,10 +1059,8 @@
return changed;
} //CFGStructurizer::prepare
-
template<class PassT>
-bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass)
-{
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass) {
passRep = &pass;
funcRep = &func;
@@ -1183,7 +1133,7 @@
sccBeginIter = iterBlk;
BlockT *sccBeginBlk = NULL;
int sccNumBlk = 0; // The number of active blocks, init to a
- // maximum possible number.
+ // maximum possible number.
int sccNumIter; // Number of iteration in this SCC.
while (iterBlk != iterBlkEnd) {
@@ -1257,7 +1207,7 @@
}
} else {
int newnumRemainedBlk
- = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
// consider cloned blocks ??
if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
makeProgress = true;
@@ -1313,12 +1263,10 @@
return true;
} //CFGStructurizer::run
-
/// Print the ordered Blocks.
///
template<class PassT>
-void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os)
-{
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
size_t i = 0;
for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
@@ -1333,11 +1281,9 @@
}
}
} //printOrderedBlocks
-
/// Compute the reversed DFS post order of Blocks
///
-template<class PassT> void CFGStructurizer<PassT>::orderBlocks()
-{
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
int sccNum = 0;
BlockT *bb;
for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
@@ -1363,11 +1309,10 @@
}
} //end of for
} //orderBlocks
-
/// Compute the reversed DFS post order of Blocks
///
-template<class PassT> void CFGStructurizer<PassT>::processAddedToTraversalBlocks()
-{
+template<class PassT> void CFGStructurizer<PassT>::
+processAddedToTraversalBlocks() {
typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
iterBlk = addedToTraversalBlks.begin();
typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
@@ -1384,7 +1329,6 @@
++iterBlk;
}
} //CFGStructurizer<PassT>::processAddedToTraversalBlocks
-
template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk)
{
int numMatch = 0;
@@ -1405,10 +1349,8 @@
return numMatch;
} //patternMatch
-
template<class PassT>
-int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk)
-{
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
int numMatch = 0;
numMatch += serialPatternMatch(curBlk);
numMatch += ifPatternMatch(curBlk);
@@ -1416,11 +1358,9 @@
numMatch += loopendPatternMatch(curBlk);
numMatch += loopPatternMatch(curBlk);
return numMatch;
-}//patternMatchGroup
-
+} //patternMatchGroup
template<class PassT>
-int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk)
-{
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
if (curBlk->succ_size() != 1) {
return 0;
}
@@ -1434,10 +1374,8 @@
++numSerialPatternMatch;
return 1;
} //serialPatternMatch
-
template<class PassT>
-int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk)
-{
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
//two edges
if (curBlk->succ_size() != 2) {
return 0;
@@ -1509,16 +1447,12 @@
return 1 + cloned;
} //ifPatternMatch
-
template<class PassT>
-int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk)
-{
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
return 0;
} //switchPatternMatch
-
template<class PassT>
-int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk)
-{
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
LoopT *loopRep = loopInfo->getLoopFor(curBlk);
typename std::vector<LoopT *> nestedLoops;
while (loopRep) {
@@ -1556,10 +1490,8 @@
return num;
} //loopendPatternMatch
-
template<class PassT>
-int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk)
-{
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
if (curBlk->succ_size() != 0) {
return 0;
}
@@ -1583,11 +1515,9 @@
return numLoop;
} //loopPatternMatch
-
template<class PassT>
int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
- BlockT *loopHeader)
-{
+ BlockT *loopHeader) {
BlockTSmallerVector exitingBlks;
loopRep->getExitingBlocks(exitingBlks);
@@ -1623,7 +1553,8 @@
int numCloned = 0;
int numSerial = 0;
- if (exitBlkSet.size() == 1) {
+ if (exitBlkSet.size() == 1)
+ {
exitLandBlk = *exitBlkSet.begin();
} else {
exitLandBlk = findNearestCommonPostDom(exitBlkSet);
@@ -1668,15 +1599,15 @@
if (exitLandBlk == parentLoopHeader &&
(exitLandBlk = relocateLoopcontBlock(parentLoopRep,
- loopRep,
- exitBlkSet,
- exitLandBlk)) != NULL) {
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
if (DEBUGME) {
errs() << "relocateLoopcontBlock success\n";
}
} else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
- exitingBlks,
- exitBlks)) != NULL) {
+ exitingBlks,
+ exitBlks)) != NULL) {
if (DEBUGME) {
errs() << "insertEndbranchBlock success\n";
}
@@ -1687,8 +1618,8 @@
return -1;
}
} else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
- exitingBlks,
- exitBlks)) != NULL) {
+ exitingBlks,
+ exitBlks)) != NULL) {
//current addLoopEndbranchBlock always does something and return non-NULL
if (DEBUGME) {
errs() << "insertEndbranchBlock success\n";
@@ -1762,11 +1693,9 @@
numClonedBlock += numCloned;
return numBreak + numSerial + numCloned;
} //loopbreakPatternMatch
-
template<class PassT>
int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
- BlockT *loopHeader)
-{
+ BlockT *loopHeader) {
int numCont = 0;
SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
for (typename InvBlockGTraits::ChildIteratorType iter =
@@ -1792,12 +1721,9 @@
return numCont;
} //loopcontPatternMatch
-
-
template<class PassT>
bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
- BlockT *src2Blk)
-{
+ BlockT *src2Blk) {
// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
// same loop with LoopLandInfo without explicitly keeping track of
// loopContBlks and loopBreakBlks, this is a method to get the information.
@@ -1818,12 +1744,10 @@
}
return false;
} //isSameloopDetachedContbreak
-
template<class PassT>
int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk)
-{
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
if (num == 0) {
if (DEBUGME) {
@@ -1857,16 +1781,13 @@
}
}
} //check NULL
-
}
return num;
}
-
template<class PassT>
int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk)
-{
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
int num = 0;
BlockT *downBlk;
@@ -1885,7 +1806,7 @@
errs() << "check down = BB" << downBlk->getNumber();
}
- if (//postDomTree->dominates(downBlk, falseBlk) &&
+ if ( //postDomTree->dominates(downBlk, falseBlk) &&
singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
if (DEBUGME) {
errs() << " working\n";
@@ -1910,14 +1831,12 @@
return num;
} //handleJumpintoIf
-
template<class PassT>
void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT *landBlk,
- bool detail)
-{
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
errs() << "head = BB" << headBlk->getNumber()
<< " size = " << headBlk->size();
if (detail) {
@@ -1956,13 +1875,11 @@
errs() << "\n";
} //showImproveSimpleJumpintoIf
-
template<class PassT>
int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT **plandBlk)
-{
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
bool migrateTrue = false;
bool migrateFalse = false;
@@ -2042,7 +1959,7 @@
headBlk->addSuccessor(landBlk);
}
- numNewBlk ++;
+ numNewBlk++;
}
bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
@@ -2050,7 +1967,7 @@
//insert AMDIL::ENDIF to avoid special case "input landBlk == NULL"
typename BlockT::iterator insertPos =
CFGTraits::getInstrPos
- (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDIL::ENDIF, passRep));
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDIL::ENDIF, passRep));
if (landBlkHasOtherPred) {
unsigned immReg = getRegister(&AMDIL::GPRI32RegClass);
@@ -2059,11 +1976,11 @@
CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
initReg, immReg);
CFGTraits::insertCondBranchBefore(landBlk, insertPos,
- AMDIL::IF_LOGICALZ_i32, passRep,
+ AMDIL::IF_LOGICALZi32r, passRep,
cmpResReg, DebugLoc());
}
- CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDIL::IF_LOGICALNZ_i32,
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDIL::IF_LOGICALNZi32r,
passRep, initReg, DebugLoc());
if (migrateTrue) {
@@ -2109,7 +2026,6 @@
return numNewBlk;
} //improveSimpleJumpintoIf
-
// Since we are after the register allocator, we don't want to use virtual
// registers as it is possible that we can get a virtual register that is
// passed the 65K limit of IL text format. So instead we serach through the
@@ -2128,14 +2044,12 @@
<< TargetRegisterInfo::virtReg2Index(reg) << "\n";
return reg;
}
-
template<class PassT>
void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
- LoopT *exitingLoop,
- BlockT *exitBlk,
- LoopT *exitLoop,
- BlockT *landBlk)
-{
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
if (DEBUGME) {
errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
<< " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
@@ -2154,15 +2068,12 @@
}
mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
-
} //handleLoopbreak
-
template<class PassT>
void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
- LoopT *contingLoop,
- BlockT *contBlk,
- LoopT *contLoop)
-{
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
if (DEBUGME) {
errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
<< " header = BB" << contBlk->getNumber() << "\n";
@@ -2188,10 +2099,8 @@
settleLoopcontBlock(contingBlk, contBlk, initReg);
//contingBlk->removeSuccessor(loopHeader);
} //handleLoopcontBlock
-
template<class PassT>
-void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk)
-{
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
if (DEBUGME) {
errs() << "serialPattern BB" << dstBlk->getNumber()
<< " <= BB" << srcBlk->getNumber() << "\n";
@@ -2205,14 +2114,12 @@
removeSuccessor(srcBlk);
retireBlock(dstBlk, srcBlk);
} //mergeSerialBlock
-
template<class PassT>
void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
- BlockT *curBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT *landBlk)
-{
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
if (DEBUGME) {
errs() << "ifPattern BB" << curBlk->getNumber();
errs() << "{ ";
@@ -2260,7 +2167,8 @@
branchDL);
if (trueBlk) {
- curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
+ curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(
+ trueBlk), trueBlk->end());
curBlk->removeSuccessor(trueBlk);
if (landBlk && trueBlk->succ_size()!=0) {
trueBlk->removeSuccessor(landBlk);
@@ -2290,13 +2198,10 @@
if (landBlk && trueBlk && falseBlk) {
curBlk->addSuccessor(landBlk);
}
-
} //mergeIfthenelseBlock
-
template<class PassT>
void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
- LoopLandInfo *loopLand)
-{
+ LoopLandInfo *loopLand) {
BlockT *landBlk = loopLand->landBlk;
if (DEBUGME) {
@@ -2319,25 +2224,29 @@
}
/* we last inserterd the DebugLoc in the
- * BREAK_LOGICALZ_i32 or AMDIL::BREAK_LOGICALNZ statement in the current dstBlk.
+ * BREAK_LOGICALZi32r or AMDIL::BREAK_LOGICALNZ statement in the current dstBlk.
* search for the DebugLoc in the that statement.
* if not found, we have to insert the empty/default DebugLoc */
InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
- DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+ DebugLoc DLBreak =
+ (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
// fogbugz #7310: work-around discussed with Uri regarding do-while loops:
// in case the the WHILELOOP line number is greater than do.body line numbers,
// take the do.body line number instead.
MachineBasicBlock::iterator iter = dstBlk->begin();
MachineInstr *instrDoBody = &(*iter);
- DebugLoc DLBreakDoBody = (instrDoBody) ? instrDoBody->getDebugLoc() : DebugLoc();
- DebugLoc DLBreakMin = (DLBreak.getLine() < DLBreakDoBody.getLine()) ? DLBreak : DLBreakDoBody;
+ DebugLoc DLBreakDoBody =
+ (instrDoBody) ? instrDoBody->getDebugLoc() : DebugLoc();
+ DebugLoc DLBreakMin =
+ (DLBreak.getLine() < DLBreakDoBody.getLine()) ? DLBreak : DLBreakDoBody;
CFGTraits::insertInstrBefore(dstBlk, AMDIL::WHILELOOP, passRep, DLBreakMin);
// Loop breakInitRegs are init before entering the loop.
for (typename std::set<RegiT>::const_iterator iter =
loopLand->breakInitRegs.begin(),
- iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
+ {
CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
}
@@ -2345,7 +2254,8 @@
* search for the DebugLoc in the continue statement.
* if not found, we have to insert the empty/default DebugLoc */
InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
- DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+ DebugLoc DLContinue =
+ (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
CFGTraits::insertInstrEnd(dstBlk, AMDIL::ENDLOOP, passRep, DLContinue);
// Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
@@ -2353,7 +2263,7 @@
for (typename std::set<RegiT>::const_iterator iter =
loopLand->breakOnRegs.begin(),
iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::BREAK_LOGICALNZ_i32, passRep,
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::BREAK_LOGICALNZi32r, passRep,
*iter);
}
@@ -2361,7 +2271,7 @@
// loop.
for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::CONTINUE_LOGICALNZ_i32,
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDIL::CONTINUE_LOGICALNZi32r,
passRep, *iter);
}
@@ -2375,13 +2285,11 @@
removeSuccessor(landBlk);
retireBlock(dstBlk, landBlk);
} //mergeLooplandBlock
-
template<class PassT>
void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
- BlockT *exitBlk,
- BlockT *exitLandBlk,
- RegiT setReg)
-{
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
if (DEBUGME) {
errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
<< " exit = BB" << exitBlk->getNumber()
@@ -2441,14 +2349,11 @@
exitBlk->removeSuccessor(exitLandBlk);
retireBlock(exitingBlk, exitBlk);
}
-
} //mergeLoopbreakBlock
-
template<class PassT>
void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
- BlockT *contBlk,
- RegiT setReg)
-{
+ BlockT *contBlk,
+ RegiT setReg) {
if (DEBUGME) {
errs() << "settleLoopcontBlock conting = BB"
<< contingBlk->getNumber()
@@ -2477,7 +2382,8 @@
bool useContinueLogical =
(setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
- if (useContinueLogical == false) {
+ if (useContinueLogical == false)
+ {
int branchOpcode =
trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
: CFGTraits::getBranchZeroOpcode(oldOpcode);
@@ -2508,8 +2414,8 @@
branchInstr->eraseFromParent();
} else {
/* if we've arrived here then we've already erased the branch instruction
- * travel back up the basic block to see the last reference of our debug location
- * we've just inserted that reference here so it should be representative */
+ * travel back up the basic block to see the last reference of our debug location
+ * we've just inserted that reference here so it should be representative */
if (setReg != INVALIDREGNUM) {
CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
// insertEnd to ensure phi-moves, if exist, go before the continue-instr.
@@ -2521,9 +2427,7 @@
CFGTraits::getLastDebugLocInBB(contingBlk));
}
} //else
-
} //settleLoopcontBlock
-
// BBs in exitBlkSet are determined as in break-path for loopRep,
// before we can put code for BBs as inside loop-body for loopRep
// check whether those BBs are determined as cont-BB for parentLoopRep
@@ -2536,15 +2440,13 @@
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
- LoopT *loopRep,
- std::set<BlockT *> &exitBlkSet,
- BlockT *exitLandBlk)
-{
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
std::set<BlockT *> endBlkSet;
// BlockT *parentLoopHead = parentLoopRep->getHeader();
-
for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
iterEnd = exitBlkSet.end();
iter != iterEnd; ++iter) {
@@ -2579,8 +2481,6 @@
return newBlk;
} //relocateLoopcontBlock
-
-
// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
// LoopLandBlock. This BB branch on the loop endBranchInit register to the
// pathes corresponding to the loop exiting branches.
@@ -2588,9 +2488,8 @@
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
- BlockTSmallerVector &exitingBlks,
- BlockTSmallerVector &exitBlks)
-{
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
RegiT endBranchReg = getRegister(&AMDIL::GPRI32RegClass);
@@ -2654,20 +2553,20 @@
DebugLoc DL;
RegiT preValReg = getRegister(&AMDIL::GPRI32RegClass);
MachineInstr* preValInst
- = BuildMI(preBranchBlk, DL, tii->get(AMDIL::LOADCONST_i32), preValReg)
- .addImm(i - 1); //preVal
+ = BuildMI(preBranchBlk, DL, tii->get(AMDIL::LOADCONSTi32), preValReg)
+ .addImm(i - 1); //preVal
SHOWNEWINSTR(preValInst);
// condResReg = (endBranchReg == preValReg)
RegiT condResReg = getRegister(&AMDIL::GPRI32RegClass);
MachineInstr* cmpInst
- = BuildMI(preBranchBlk, DL, tii->get(AMDIL::IEQ), condResReg)
- .addReg(endBranchReg).addReg(preValReg);
+ = BuildMI(preBranchBlk, DL, tii->get(AMDIL::EQi32rr), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
SHOWNEWINSTR(cmpInst);
MachineInstr* condBranchInst
- = BuildMI(preBranchBlk, DL, tii->get(AMDIL::BRANCH_COND_i32))
- .addMBB(preExitBlk).addReg(condResReg);
+ = BuildMI(preBranchBlk, DL, tii->get(AMDIL::BRANCHi32br))
+ .addMBB(preExitBlk).addReg(condResReg);
SHOWNEWINSTR(condBranchInst);
preBranchBlk->addSuccessor(preExitBlk);
@@ -2677,17 +2576,14 @@
preExitingBlk = curExitingBlk;
preExitBlk = curExitBlk;
preBranchBlk = curBranchBlk;
-
} //end for 1 .. n blocks
return newLandBlk;
} //addLoopEndbranchBlock
-
template<class PassT>
typename CFGStructurizer<PassT>::PathToKind
CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
- bool allowSideEntry)
-{
+ bool allowSideEntry) {
assert(dstBlk);
if (srcBlk == dstBlk) {
@@ -2711,15 +2607,13 @@
return Not_SinglePath;
} //singlePathTo
-
// If there is a single path from srcBlk to dstBlk, return the last block before
// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
// last block in the path Otherwise, return NULL
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
- bool allowSideEntry)
-{
+ bool allowSideEntry) {
//assert(dstBlk);
if (srcBlk == dstBlk) {
@@ -2748,13 +2642,10 @@
}
return NULL;
-
} //singlePathEnd
-
template<class PassT>
int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
- BlockT *dstBlk)
-{
+ BlockT *dstBlk) {
int cloned = 0;
assert(preBlk->isSuccessor(srcBlk));
while (srcBlk && srcBlk != dstBlk) {
@@ -2774,12 +2665,10 @@
return cloned;
} //cloneOnSideEntryTo
-
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
- BlockT *predBlk)
-{
+ BlockT *predBlk) {
assert(predBlk->isSuccessor(curBlk) &&
"succBlk is not a prececessor of curBlk");
@@ -2804,12 +2693,10 @@
return cloneBlk;
} //cloneBlockForPredecessor
-
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
- BlockT *exitingBlk)
-{
+ BlockT *exitingBlk) {
BlockT *exitBlk = NULL;
for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
@@ -2826,23 +2713,21 @@
return exitBlk;
} //exitingBlock2ExitBlock
-
template<class PassT>
void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
- BlockT *dstBlk,
- InstrIterator insertPos)
-{
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
InstrIterator spliceEnd;
//look for the input branchinstr, not the AMDIL branchinstr
InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
if (branchInstr == NULL) {
if (DEBUGME) {
- errs() << "migrateInstruction don't see branch instr\n" ;
+ errs() << "migrateInstruction don't see branch instr\n";
}
spliceEnd = srcBlk->end();
} else {
if (DEBUGME) {
- errs() << "migrateInstruction see branch instr\n" ;
+ errs() << "migrateInstruction see branch instr\n";
branchInstr->dump();
}
spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
@@ -2860,7 +2745,6 @@
<< "srcSize = " << srcBlk->size() << "\n";
}
} //migrateInstruction
-
// normalizeInfiniteLoopExit change
// B1:
// uncond_br LoopHeader
@@ -2872,8 +2756,7 @@
//
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep)
-{
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
BlockT *loopHeader;
BlockT *loopLatch;
loopHeader = LoopRep->getHeader();
@@ -2893,7 +2776,7 @@
unsigned immReg = getRegister(&AMDIL::GPRI32RegClass);
CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
InstrT *newInstr =
- CFGTraits::insertInstrBefore(insertPos, AMDIL::BRANCH_COND_i32,
+ CFGTraits::insertInstrBefore(insertPos, AMDIL::BRANCHi32br,
passRep);
MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
SHOWNEWINSTR(newInstr);
@@ -2904,10 +2787,8 @@
return dummyExitBlk;
} //normalizeInfiniteLoopExit
-
template<class PassT>
-void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk)
-{
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
InstrT *branchInstr;
// I saw two unconditional branch in one basic block in example
@@ -2915,16 +2796,14 @@
while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
&& CFGTraits::isUncondBranch(branchInstr)) {
if (DEBUGME) {
- errs() << "Removing unconditional branch instruction" ;
+ errs() << "Removing unconditional branch instruction";
branchInstr->dump();
}
branchInstr->eraseFromParent();
}
} //removeUnconditionalBranch
-
template<class PassT>
-void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk)
-{
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
if (srcBlk->succ_size() == 2) {
BlockT *blk1 = *srcBlk->succ_begin();
BlockT *blk2 = *(srcBlk->succ_begin()+1);
@@ -2933,7 +2812,7 @@
InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
if (DEBUGME) {
- errs() << "Removing unneeded conditional branch instruction" ;
+ errs() << "Removing unneeded conditional branch instruction";
branchInstr->dump();
}
branchInstr->eraseFromParent();
@@ -2942,11 +2821,10 @@
}
}
} //removeRedundantConditionalBranch
-
template<class PassT>
void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
- DEFAULT_VEC_SLOTS> &retBlks)
-{
+ DEFAULT_VEC_SLOTS> &
+ retBlks) {
BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
funcRep->push_back(dummyExitBlk); //insert to function
CFGTraits::insertInstrEnd(dummyExitBlk, AMDIL::RETURN, passRep);
@@ -2979,18 +2857,14 @@
SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
} //addDummyExitBlock
-
template<class PassT>
-void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk)
-{
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
while (srcBlk->succ_size()) {
srcBlk->removeSuccessor(*srcBlk->succ_begin());
}
}
-
template<class PassT>
-void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum)
-{
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
if (srcBlkInfo == NULL) {
@@ -2999,27 +2873,21 @@
srcBlkInfo->sccNum = sccNum;
}
-
template<class PassT>
-int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk)
-{
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
}
-
template<class PassT>
-void CFGStructurizer<PassT>::addToTraversalBlock(BlockT *srcBlk)
-{
+void CFGStructurizer<PassT>::addToTraversalBlock(BlockT *srcBlk) {
if (DEBUGME) {
errs() << "AddToTraversal BB" << srcBlk->getNumber() << "\n";
}
addedToTraversalBlks.push_back(srcBlk);
}
-
template<class PassT>
-void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk)
-{
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
if (DEBUGME) {
errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
}
@@ -3036,17 +2904,13 @@
assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
&& "can't retire block yet");
}
-
template<class PassT>
-bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk)
-{
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
return (srcBlkInfo && srcBlkInfo->isRetired);
}
-
template<class PassT>
-bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk)
-{
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
LoopT *loopRep = loopInfo->getLoopFor(curBlk);
while (loopRep && loopRep->getHeader() == curBlk) {
LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
@@ -3065,10 +2929,8 @@
return false;
} //isActiveLoophead
-
template<class PassT>
-bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk)
-{
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
const unsigned blockSizeThreshold = 30;
const unsigned cloneInstrThreshold = 100;
@@ -3081,19 +2943,18 @@
return ((blkSize > blockSizeThreshold)
&& (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
} //needMigrateBlock
-
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
- BlockTSmallerVector &exitBlks,
- std::set<BlockT *> &exitBlkSet)
-{
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
predIterEnd = landBlk->pred_end();
predIter != predIterEnd; ++predIter) {
BlockT *curBlk = *predIter;
+ if (curBlk == landBlk) continue; // in case landBlk is a single-block loop
if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
inpathBlks.push_back(curBlk);
}
@@ -3127,10 +2988,8 @@
return newLandBlk;
} // recordLoopbreakLand
-
template<class PassT>
-void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk)
-{
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
if (theEntry == NULL) {
@@ -3152,10 +3011,8 @@
<< " landing-block = BB" << blk->getNumber() << "\n";
}
} // setLoopLandBlock
-
template<class PassT>
-void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum)
-{
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
if (theEntry == NULL) {
@@ -3170,10 +3027,8 @@
<< " regNum = " << regNum << "\n";
}
} // addLoopBreakOnReg
-
template<class PassT>
-void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum)
-{
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
if (theEntry == NULL) {
@@ -3187,10 +3042,9 @@
<< " regNum = " << regNum << "\n";
}
} // addLoopContOnReg
-
template<class PassT>
-void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum)
-{
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep,
+ RegiT regNum) {
LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
if (theEntry == NULL) {
@@ -3204,10 +3058,8 @@
<< " regNum = " << regNum << "\n";
}
} // addLoopBreakInitReg
-
template<class PassT>
-void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum)
-{
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
if (theEntry == NULL) {
@@ -3221,11 +3073,9 @@
<< " regNum = " << regNum << "\n";
}
} // addLoopContInitReg
-
template<class PassT>
void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
- RegiT regNum)
-{
+ RegiT regNum) {
LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
if (theEntry == NULL) {
@@ -3233,35 +3083,29 @@
}
theEntry->endbranchInitRegs.insert(regNum);
- if (DEBUGME) {
+ if (DEBUGME)
+ {
errs() << "addLoopEndbranchInitReg loop-header = BB"
<< loopRep->getHeader()->getNumber()
<< " regNum = " << regNum << "\n";
}
} // addLoopEndbranchInitReg
-
template<class PassT>
typename CFGStructurizer<PassT>::LoopLandInfo *
-CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep)
-{
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
return theEntry;
} // getLoopLandInfo
-
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep)
-{
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
return theEntry ? theEntry->landBlk : NULL;
} // getLoopLandBlock
-
-
template<class PassT>
-bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk)
-{
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
LoopT *loopRep = loopInfo->getLoopFor(curBlk);
if (loopRep == NULL)
return false;
@@ -3269,20 +3113,15 @@
BlockT *loopHeader = loopRep->getHeader();
return curBlk->isSuccessor(loopHeader);
-
} //hasBackEdge
-
template<class PassT>
-unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep)
-{
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
return loopRep ? loopRep->getLoopDepth() : 0;
} //getLoopDepth
-
template<class PassT>
int CFGStructurizer<PassT>::countActiveBlock
-(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
- typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd)
-{
+ (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
int count = 0;
while (iterStart != iterEnd) {
if (!isRetiredBlock(*iterStart)) {
@@ -3293,15 +3132,12 @@
return count;
} //countActiveBlock
-
// This is work around solution for findNearestCommonDominator not avaiable to
// post dom a proper fix should go to Dominators.h.
template<class PassT>
typename CFGStructurizer<PassT>::BlockT*
-CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2)
-{
-
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
if (postDomTree->dominates(blk1, blk2)) {
return blk1;
}
@@ -3334,12 +3170,10 @@
return NULL;
}
-
template<class PassT>
typename CFGStructurizer<PassT>::BlockT *
CFGStructurizer<PassT>::findNearestCommonPostDom
-(typename std::set<BlockT *> &blks)
-{
+ (typename std::set<BlockT *> &blks) {
BlockT *commonDom;
typename std::set<BlockT *>::const_iterator iter = blks.begin();
typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
@@ -3361,19 +3195,16 @@
return commonDom;
} //findNearestCommonPostDom
-
} //end namespace llvm
//todo: move-end
-
//===----------------------------------------------------------------------===//
//
// CFGStructurizer for AMDIL
//
//===----------------------------------------------------------------------===//
-
using namespace llvmCFGStruct;
namespace llvm
@@ -3381,14 +3212,14 @@
class AMDILCFGStructurizer : public MachineFunctionPass
{
public:
- typedef MachineInstr InstructionType;
- typedef MachineFunction FunctionType;
- typedef MachineBasicBlock BlockType;
- typedef MachineLoopInfo LoopinfoType;
- typedef MachineDominatorTree DominatortreeType;
- typedef MachinePostDominatorTree PostDominatortreeType;
- typedef MachineDomTreeNode DomTreeNodeType;
- typedef MachineLoop LoopType;
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
//private:
const TargetInstrInfo *TII;
@@ -3402,18 +3233,14 @@
virtual bool runOnMachineFunction(MachineFunction &F) = 0;
private:
-
}; //end of class AMDILCFGStructurizer
//char AMDILCFGStructurizer::ID = 0;
} //end of namespace llvm
AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid)
- : MachineFunctionPass(pid), TII(NULL)
-{
+ : MachineFunctionPass(pid), TII(NULL) {
}
-
-const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const
-{
+const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const {
return TII;
}
//===----------------------------------------------------------------------===//
@@ -3422,14 +3249,15 @@
//
//===----------------------------------------------------------------------===//
+namespace llvm
+{
+extern void initializeAMDILCFGPreparePass(llvm::PassRegistry&);
+}
using namespace llvmCFGStruct;
namespace llvm
{
-
-extern void initializeAMDILCFGPreparePass(PassRegistry&);
-
class AMDILCFGPrepare : public AMDILCFGStructurizer
{
public:
@@ -3444,7 +3272,6 @@
bool runOnMachineFunction(MachineFunction &F);
private:
-
}; //end of class AMDILCFGPrepare
char AMDILCFGPrepare::ID = 0;
@@ -3455,34 +3282,31 @@
{
initializeAMDILCFGPreparePass(*PassRegistry::getPassRegistry());
}
-const char *AMDILCFGPrepare::getPassName() const
-{
+const char *AMDILCFGPrepare::getPassName() const {
return "AMD IL Control Flow Graph Preparation Pass";
}
-
-void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const
-{
+void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineFunctionAnalysis>();
AU.addRequired<MachineFunctionAnalysis>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
}
-
//===----------------------------------------------------------------------===//
//
// CFGPerform
//
//===----------------------------------------------------------------------===//
+namespace llvm
+{
+extern void initializeAMDILCFGPerformPass(llvm::PassRegistry&);
+}
using namespace llvmCFGStruct;
namespace llvm
{
-
-extern void initializeAMDILCFGPerformPass(PassRegistry&);
-
class AMDILCFGPerform : public AMDILCFGStructurizer
{
public:
@@ -3495,7 +3319,6 @@
bool runOnMachineFunction(MachineFunction &F);
private:
-
}; //end of class AMDILCFGPerform
char AMDILCFGPerform::ID = 0;
@@ -3506,21 +3329,16 @@
{
initializeAMDILCFGPerformPass(*PassRegistry::getPassRegistry());
}
-
-const char *AMDILCFGPerform::getPassName() const
-{
+const char *AMDILCFGPerform::getPassName() const {
return "AMD IL Control Flow Graph structurizer Pass";
}
-
-void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const
-{
+void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineFunctionAnalysis>();
AU.addRequired<MachineFunctionAnalysis>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
}
-
//===----------------------------------------------------------------------===//
//
// CFGStructTraits<AMDILCFGStructurizer>
@@ -3531,63 +3349,125 @@
{
// this class is tailor to the AMDIL backend
template<>
-struct CFGStructTraits<AMDILCFGStructurizer> {
+struct CFGStructTraits<AMDILCFGStructurizer>
+{
typedef int RegiT;
static int getBreakNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALNZ);
+ case AMDIL::BRANCHf64bi:
+ case AMDIL::BRANCHf64br: return AMDIL::BREAK_LOGICALNZf64r;
+ case AMDIL::BRANCHf32bi:
+ case AMDIL::BRANCHf32br: return AMDIL::BREAK_LOGICALNZf32r;
+ case AMDIL::BRANCHi64bi:
+ case AMDIL::BRANCHi64br: return AMDIL::BREAK_LOGICALNZi64r;
+ case AMDIL::BRANCHi32bi:
+ case AMDIL::BRANCHi32br: return AMDIL::BREAK_LOGICALNZi32r;
+ case AMDIL::BRANCHi16bi:
+ case AMDIL::BRANCHi16br: return AMDIL::BREAK_LOGICALNZi16r;
+ case AMDIL::BRANCHi8bi:
+ case AMDIL::BRANCHi8br: return AMDIL::BREAK_LOGICALNZi8r;
default:
assert(0 && "internal error");
};
return -1;
}
-
static int getBreakZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::BREAK_LOGICALZ);
+ case AMDIL::BRANCHf64bi:
+ case AMDIL::BRANCHf64br: return AMDIL::BREAK_LOGICALZf64r;
+ case AMDIL::BRANCHf32bi:
+ case AMDIL::BRANCHf32br: return AMDIL::BREAK_LOGICALZf32r;
+ case AMDIL::BRANCHi64bi:
+ case AMDIL::BRANCHi64br: return AMDIL::BREAK_LOGICALZi64r;
+ case AMDIL::BRANCHi32bi:
+ case AMDIL::BRANCHi32br: return AMDIL::BREAK_LOGICALZi32r;
+ case AMDIL::BRANCHi16bi:
+ case AMDIL::BRANCHi16br: return AMDIL::BREAK_LOGICALZi16r;
+ case AMDIL::BRANCHi8bi:
+ case AMDIL::BRANCHi8br: return AMDIL::BREAK_LOGICALZi8r;
default:
assert(0 && "internal error");
};
return -1;
}
-
static int getBranchNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALNZ);
+ case AMDIL::BRANCHf64bi:
+ case AMDIL::BRANCHf64br: return AMDIL::IF_LOGICALNZf64r;
+ case AMDIL::BRANCHf32bi:
+ case AMDIL::BRANCHf32br: return AMDIL::IF_LOGICALNZf32r;
+ case AMDIL::BRANCHi64bi:
+ case AMDIL::BRANCHi64br: return AMDIL::IF_LOGICALNZi64r;
+ case AMDIL::BRANCHi32bi:
+ case AMDIL::BRANCHi32br: return AMDIL::IF_LOGICALNZi32r;
+ case AMDIL::BRANCHi16bi:
+ case AMDIL::BRANCHi16br: return AMDIL::IF_LOGICALNZi16r;
+ case AMDIL::BRANCHi8bi:
+ case AMDIL::BRANCHi8br: return AMDIL::IF_LOGICALNZi8r;
default:
assert(0 && "internal error");
};
return -1;
}
-
static int getBranchZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::IF_LOGICALZ);
+ case AMDIL::BRANCHf64bi:
+ case AMDIL::BRANCHf64br: return AMDIL::IF_LOGICALZf64r;
+ case AMDIL::BRANCHf32bi:
+ case AMDIL::BRANCHf32br: return AMDIL::IF_LOGICALZf32r;
+ case AMDIL::BRANCHi64bi:
+ case AMDIL::BRANCHi64br: return AMDIL::IF_LOGICALZi64r;
+ case AMDIL::BRANCHi32bi:
+ case AMDIL::BRANCHi32br: return AMDIL::IF_LOGICALZi32r;
+ case AMDIL::BRANCHi16br:
+ case AMDIL::BRANCHi16bi: return AMDIL::IF_LOGICALZi16r;
+ case AMDIL::BRANCHi8bi:
+ case AMDIL::BRANCHi8br: return AMDIL::IF_LOGICALZi8r;
default:
assert(0 && "internal error");
};
return -1;
}
-
- static int getContinueNzeroOpcode(int oldOpcode) {
+ static int getContinueNzeroOpcode(int oldOpcode)
+ {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALNZ);
+ case AMDIL::BRANCHf64bi:
+ case AMDIL::BRANCHf64br: return AMDIL::CONTINUE_LOGICALNZf64r;
+ case AMDIL::BRANCHf32bi:
+ case AMDIL::BRANCHf32br: return AMDIL::CONTINUE_LOGICALNZf32r;
+ case AMDIL::BRANCHi64bi:
+ case AMDIL::BRANCHi64br: return AMDIL::CONTINUE_LOGICALNZi64r;
+ case AMDIL::BRANCHi32bi:
+ case AMDIL::BRANCHi32br: return AMDIL::CONTINUE_LOGICALNZi32r;
+ case AMDIL::BRANCHi16bi:
+ case AMDIL::BRANCHi16br: return AMDIL::CONTINUE_LOGICALNZi16r;
+ case AMDIL::BRANCHi8bi:
+ case AMDIL::BRANCHi8br: return AMDIL::CONTINUE_LOGICALNZi8r;
default:
assert(0 && "internal error");
};
return -1;
}
-
static int getContinueZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDIL::BRANCH_COND, AMDIL::CONTINUE_LOGICALZ);
+ case AMDIL::BRANCHf64bi:
+ case AMDIL::BRANCHf64br: return AMDIL::CONTINUE_LOGICALZf64r;
+ case AMDIL::BRANCHf32bi:
+ case AMDIL::BRANCHf32br: return AMDIL::CONTINUE_LOGICALZf32r;
+ case AMDIL::BRANCHi64bi:
+ case AMDIL::BRANCHi64br: return AMDIL::CONTINUE_LOGICALZi64r;
+ case AMDIL::BRANCHi32br:
+ case AMDIL::BRANCHi32bi: return AMDIL::CONTINUE_LOGICALZi32r;
+ case AMDIL::BRANCHi16br:
+ case AMDIL::BRANCHi16bi: return AMDIL::CONTINUE_LOGICALZi16r;
+ case AMDIL::BRANCHi8bi:
+ case AMDIL::BRANCHi8br: return AMDIL::CONTINUE_LOGICALZi8r;
default:
assert(0 && "internal error");
};
return -1;
}
-
// the explicitly represented branch target is the true branch target
#define getExplicitBranch getTrueBranch
#define setExplicitBranch setTrueBranch
@@ -3595,11 +3475,9 @@
static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
return instr->getOperand(0).getMBB();
}
-
static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
instr->getOperand(0).setMBB(blk);
}
-
static MachineBasicBlock *
getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
assert(blk->succ_size() == 2);
@@ -3610,41 +3488,41 @@
return (*iter == trueBranch) ? *iterNext : *iter;
}
-
static bool isCondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
- ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ case AMDIL::BRANCHf64bi:
+ case AMDIL::BRANCHf32bi:
+ case AMDIL::BRANCHi64bi:
+ case AMDIL::BRANCHi32bi:
+ case AMDIL::BRANCHi16bi:
+ case AMDIL::BRANCHi8bi:
+ case AMDIL::BRANCHf64br:
+ case AMDIL::BRANCHf32br:
+ case AMDIL::BRANCHi64br:
+ case AMDIL::BRANCHi32br:
+ case AMDIL::BRANCHi16br:
+ case AMDIL::BRANCHi8br:
break;
default:
return false;
}
return true;
}
-
static bool isUncondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
- case AMDIL::BRANCH:
- break;
- default:
- return false;
- }
- return true;
- }
-
- static bool isPhimove(MachineInstr *instr) {
- switch (instr->getOpcode()) {
- ExpandCaseToAllTypes(AMDIL::MOVE);
+ case AMDIL::BRANCHb:
break;
default:
return false;
}
return true;
}
-
static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
//get DebugLoc from the first MachineBasicBlock instruction with debug info
DebugLoc DL;
- for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ for (MachineBasicBlock::iterator iter = blk->begin();
+ iter != blk->end();
+ ++iter) {
MachineInstr *instr = &(*iter);
if (instr->getDebugLoc().isUnknown() == false) {
DL = instr->getDebugLoc();
@@ -3652,7 +3530,6 @@
}
return DL;
}
-
static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
MachineBasicBlock::reverse_iterator iter = blk->rbegin();
MachineInstr *instr = &*iter;
@@ -3661,7 +3538,6 @@
}
return NULL;
}
-
// The correct naming for this is getPossibleLoopendBlockBranchInstr.
//
// BB with backward-edge could have move instructions after the branch
@@ -3675,14 +3551,13 @@
if (instr) {
if (isCondBranch(instr) || isUncondBranch(instr)) {
return instr;
- } else if (!isPhimove(instr)) {
+ } else if (instr->getOpcode() == TargetOpcode::COPY) {
break;
}
}
}
return NULL;
}
-
static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
MachineBasicBlock::reverse_iterator iter = blk->rbegin();
if (iter != blk->rend()) {
@@ -3693,7 +3568,6 @@
}
return NULL;
}
-
static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
MachineBasicBlock::reverse_iterator iter = blk->rbegin();
if (iter != blk->rend()) {
@@ -3704,17 +3578,18 @@
}
return NULL;
}
-
static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
- for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ for (MachineBasicBlock::iterator iter = blk->begin();
+ (iter != blk->end());
+ ++iter) {
MachineInstr *instr = &(*iter);
- if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDIL::BREAK_LOGICALZ_i32)) {
+ if ((instr->getOpcode() == AMDIL::BREAK_LOGICALNZi32r) ||
+ (instr->getOpcode() == AMDIL::BREAK_LOGICALZi32r)) {
return instr;
}
}
return NULL;
}
-
static bool isReturnBlock(MachineBasicBlock *blk) {
MachineInstr *instr = getReturnInstr(blk);
bool isReturn = (blk->succ_size() == 0);
@@ -3727,9 +3602,8 @@
}
}
- return isReturn;
+ return isReturn;
}
-
static MachineBasicBlock::iterator
getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
assert(instr->getParent() == blk && "instruction doesn't belong to block");
@@ -3741,15 +3615,15 @@
assert(iter != iterEnd);
return iter;
- }//getInstrPos
-
+ } //getInstrPos
static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
AMDILCFGStructurizer *passRep) {
return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
} //insertInstrBefore
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
- AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ DebugLoc DL) {
const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
MachineInstr *newInstr =
blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
@@ -3765,12 +3639,10 @@
return newInstr;
} //insertInstrBefore
-
static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
AMDILCFGStructurizer *passRep) {
insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
} //insertInstrEnd
-
static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
AMDILCFGStructurizer *passRep, DebugLoc DL) {
const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
@@ -3782,7 +3654,6 @@
SHOWNEWINSTR(newInstr);
} //insertInstrEnd
-
static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
int newOpcode,
AMDILCFGStructurizer *passRep,
@@ -3799,13 +3670,11 @@
SHOWNEWINSTR(newInstr);
return newInstr;
} //insertInstrBefore
-
static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
int newOpcode,
AMDILCFGStructurizer *passRep) {
return insertInstrBefore(instrPos, newOpcode, passRep, DebugLoc());
} //insertInstrBefore
-
static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
int newOpcode,
AMDILCFGStructurizer *passRep,
@@ -3818,13 +3687,20 @@
DL);
blk->insert(instrPos, newInstr);
- MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
- false);
+ if (oldInstr->getOperand(1).isReg()) {
+ MachineInstrBuilder(newInstr).addReg(
+ oldInstr->getOperand(1).getReg(), false);
+ } else if (oldInstr->getOperand(1).isImm()) {
+ MachineInstrBuilder(newInstr).addImm(
+ oldInstr->getOperand(1).getImm());
+ } else if (oldInstr->getOperand(1).isFPImm()) {
+ MachineInstrBuilder(newInstr).addFPImm(
+ oldInstr->getOperand(1).getFPImm());
+ }
SHOWNEWINSTR(newInstr);
//erase later oldInstr->eraseFromParent();
} //insertCondBranchBefore
-
static void insertCondBranchBefore(MachineBasicBlock *blk,
MachineBasicBlock::iterator insertPos,
int newOpcode,
@@ -3842,7 +3718,6 @@
SHOWNEWINSTR(newInstr);
} //insertCondBranchBefore
-
static void insertCondBranchEnd(MachineBasicBlock *blk,
int newOpcode,
AMDILCFGStructurizer *passRep,
@@ -3856,8 +3731,6 @@
SHOWNEWINSTR(newInstr);
} //insertCondBranchEnd
-
-
static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
AMDILCFGStructurizer *passRep,
RegiT regNum, int regVal) {
@@ -3865,7 +3738,7 @@
const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
MachineBasicBlock *blk = oldInstr->getParent();
MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONSTi32),
DebugLoc());
MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
MachineInstrBuilder(newInstr).addImm(regVal); //set src value
@@ -3874,14 +3747,13 @@
SHOWNEWINSTR(newInstr);
} //insertAssignInstrBefore
-
static void insertAssignInstrBefore(MachineBasicBlock *blk,
AMDILCFGStructurizer *passRep,
RegiT regNum, int regVal) {
const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONST_i32),
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::LOADCONSTi32),
DebugLoc());
MachineInstrBuilder(newInstr).addReg(regNum, RegState::Define); //set target
MachineInstrBuilder(newInstr).addImm(regVal); //set src value
@@ -3893,9 +3765,7 @@
}
SHOWNEWINSTR(newInstr);
-
} //insertInstrBefore
-
static void insertCompareInstrBefore(MachineBasicBlock *blk,
MachineBasicBlock::iterator instrPos,
AMDILCFGStructurizer *passRep,
@@ -3903,7 +3773,7 @@
RegiT src2Reg) {
const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(AMDIL::IEQ), DebugLoc());
+ blk->getParent()->CreateMachineInstr(tii->get(AMDIL::EQi32rr), DebugLoc());
MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
@@ -3911,9 +3781,7 @@
blk->insert(instrPos, newInstr);
SHOWNEWINSTR(newInstr);
-
} //insertCompareInstrBefore
-
static void cloneSuccessorList(MachineBasicBlock *dstBlk,
MachineBasicBlock *srcBlk) {
for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
@@ -3921,7 +3789,6 @@
dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
}
} //cloneSuccessorList
-
static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
MachineFunction *func = srcBlk->getParent();
MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
@@ -3939,7 +3806,6 @@
}
return newBlk;
}
-
//MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
//the AMDIL instruction is not recognized as terminator fix this and retire
//this routine
@@ -3952,7 +3818,6 @@
setExplicitBranch(branchInstr, newBlk);
}
}
-
static void wrapup(MachineBasicBlock *entryBlk) {
assert((!entryBlk->getParent()->getJumpTableInfo()
|| entryBlk->getParent()->getJumpTableInfo()->isEmpty())
@@ -3970,7 +3835,7 @@
}
pre = iter;
++iter;
- } //end while
+ } //end while
//delete continue right before endloop
for (unsigned i = 0; i < contInstr.size(); ++i) {
@@ -3981,18 +3846,14 @@
// (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
// there isn't such an interface yet. alternatively, replace all the other
// blocks in the jump table with the entryBlk //}
-
} //wrapup
-
static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
return &pass.getAnalysis<MachineDominatorTree>();
}
-
static MachinePostDominatorTree*
getPostDominatorTree(AMDILCFGStructurizer &pass) {
return &pass.getAnalysis<MachinePostDominatorTree>();
}
-
static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
return &pass.getAnalysis<MachineLoopInfo>();
}
@@ -4009,7 +3870,7 @@
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo);
INITIALIZE_PASS_END(AMDILCFGPrepare, "amdcfgprepare",
"AMD IL Control Flow Graph Preparation Pass",
- false, false)
+ false, false);
INITIALIZE_PASS_BEGIN(AMDILCFGPerform, "amdcfgperform",
"AMD IL Control Flow Graph structurizer Pass",
@@ -4019,39 +3880,25 @@
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo);
INITIALIZE_PASS_END(AMDILCFGPerform, "amdcfgperform",
"AMD IL Control Flow Graph structurizer Pass",
- false, false)
-
-namespace llvm
-{
-FunctionPass *createAMDILCFGPreparationPass();
-FunctionPass *createAMDILCFGStructurizerPass();
-}
+ false, false);
// createAMDILCFGPreparationPass- Returns a pass
-FunctionPass *llvm::createAMDILCFGPreparationPass()
-{
+FunctionPass *llvm::createAMDILCFGPreparationPass() {
return new AMDILCFGPrepare();
}
-
-bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func)
-{
+bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) {
TII = func.getTarget().getInstrInfo();
return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
- *this);
+ *this);
}
-
// createAMDILCFGStructurizerPass- Returns a pass
-FunctionPass *llvm::createAMDILCFGStructurizerPass()
-{
+FunctionPass *llvm::createAMDILCFGStructurizerPass() {
return new AMDILCFGPerform();
}
-
-bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func)
-{
+bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) {
TII = func.getTarget().getInstrInfo();
return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
- *this);
+ *this);
}
-
//end of file newline goes below
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerErrors.h Wed Sep 12 12:43:34 2012
@@ -16,9 +16,7 @@
// Compiler errors generated by the backend that will cause
// the runtime to abort compilation. These are mainly for
// device constraint violations or invalid code.
-namespace amd
-{
-
+namespace amd {
#define INVALID_COMPUTE 0
#define GENERIC_ERROR 1
#define INTERNAL_ERROR 2
@@ -50,7 +48,8 @@
#define NO_FLAT_SUPPORT 28
#define NUM_ERROR_MESSAGES 29
-static const char *CompilerErrorMessage[NUM_ERROR_MESSAGES] = {
+static const char *CompilerErrorMessage[NUM_ERROR_MESSAGES] =
+{
"E000:Compute Shader Not Supported! ",
"E001:Generic Compiler Error Message! ",
"E002:Internal Compiler Error Message!",
@@ -81,7 +80,6 @@
"E027:Semaphore init value is invalid!",
"E028:Flat address is not supported! "
};
-
}
#endif // _AMDIL_COMPILER_ERRORS_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILCompilerWarnings.h Wed Sep 12 12:43:34 2012
@@ -16,21 +16,19 @@
/// Compiler backend generated warnings that might cause
/// issues with compilation. These warnings become errors if
/// -Werror is specified on the command line.
-namespace amd
-{
-
+namespace amd {
#define LIMIT_BARRIER 0
#define BAD_BARRIER_OPT 1
#define RECOVERABLE_ERROR 2
#define NUM_WARN_MESSAGES 3
-static const char *CompilerWarningMessage[NUM_WARN_MESSAGES] = {
+static const char *CompilerWarningMessage[NUM_WARN_MESSAGES] =
+{
/// All warnings must be prefixed with the W token or they might be
/// treated as errors.
"W000:Barrier caused limited groupsize",
"W001:Dangerous Barrier Opt Detected! ",
"W002:Recoverable BE Error Detected! "
-
};
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILConversions.td Wed Sep 12 12:43:34 2012
@@ -11,1033 +11,1033 @@
//
//===----------------------------------------------------------------------===//
-def actos_i16:Pat < (i16 (anyext GPRI8:$src)),
-(IL_ASSHORT_i32
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24))) >;
-
-
-def uctos_i16:Pat < (i16 (zext GPRI8:$src)),
-(IL_ASSHORT_i32
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24))) >;
-
-
-def sctos_i16:Pat < (i16 (sext GPRI8:$src)),
-(IL_ASSHORT_i32
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24))) >;
-
-
-def actoi_i32:Pat < (i32 (anyext GPRI8:$src)),
-(IL_ASINT_i32
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24))) >;
-
-
-def uctoi_i32:Pat < (i32 (zext GPRI8:$src)),
-(IL_ASINT_i32
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24))) >;
-
-
-def sctoi_i32:Pat < (i32 (sext GPRI8:$src)),
-(IL_ASINT_i32
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24))) >;
-
-
-def actol_i64:Pat < (i64 (anyext GPRI8:$src)),
-(LCREATE
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 0)) >;
-
-
-def uctol_i64:Pat < (i64 (zext GPRI8:$src)),
-(LCREATE
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 0)) >;
-
-
-def sctol_i64:Pat < (i64 (sext GPRI8:$src)),
-(LCREATE
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24)),
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 31))) >;
-
-
-def astoi_i32:Pat < (i32 (anyext GPRI16:$src)),
-(IL_ASINT_i32
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16))) >;
-
-
-def ustoi_i32:Pat < (i32 (zext GPRI16:$src)),
-(IL_ASINT_i32
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16))) >;
-
-
-def sstoi_i32:Pat < (i32 (sext GPRI16:$src)),
-(IL_ASINT_i32
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16))) >;
-
-
-def astol_i64:Pat < (i64 (anyext GPRI16:$src)),
-(LCREATE
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 0)) >;
-
-
-def ustol_i64:Pat < (i64 (zext GPRI16:$src)),
-(LCREATE
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 0)) >;
-
-
-def sstol_i64:Pat < (i64 (sext GPRI16:$src)),
-(LCREATE
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16)),
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 31))) >;
-
-
-def aitol_i64:Pat < (i64 (anyext GPRI32:$src)),
-(LCREATE
-(IL_ASINT_i32 GPRI32:$src),
- (LOADCONST_i32 0)) >;
-
-
-def uitol_i64:Pat < (i64 (zext GPRI32:$src)),
-(LCREATE
-(IL_ASINT_i32 GPRI32:$src),
- (LOADCONST_i32 0)) >;
-
-
-def sitol_i64:Pat < (i64 (sext GPRI32:$src)),
-(LCREATE
-(IL_ASINT_i32 GPRI32:$src),
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i32 GPRI32:$src),
- (LOADCONST_i32 0)),
- (LOADCONST_i32 31))) >;
+def actosi16rr:Pat < (i16 (anyext GPRI8:$src)),
+(IL_ASSHORTi32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))) >;
+
+
+def uctosi16rr:Pat < (i16 (zext GPRI8:$src)),
+(IL_ASSHORTi32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))) >;
+
+
+def sctosi16rr:Pat < (i16 (sext GPRI8:$src)),
+(IL_ASSHORTi32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))) >;
+
+
+def actoii32rr:Pat < (i32 (anyext GPRI8:$src)),
+(IL_ASINTi32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))) >;
+
+
+def uctoii32rr:Pat < (i32 (zext GPRI8:$src)),
+(IL_ASINTi32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))) >;
+
+
+def sctoii32rr:Pat < (i32 (sext GPRI8:$src)),
+(IL_ASINTi32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))) >;
+
+
+def actoli64rr:Pat < (i64 (anyext GPRI8:$src)),
+(LCREATEi64rr
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 0)) >;
+
+
+def uctoli64rr:Pat < (i64 (zext GPRI8:$src)),
+(LCREATEi64rr
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 0)) >;
+
+
+def sctoli64rr:Pat < (i64 (sext GPRI8:$src)),
+(LCREATEi64rr
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24)),
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 31))) >;
+
+
+def astoii32rr:Pat < (i32 (anyext GPRI16:$src)),
+(IL_ASINTi32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16))) >;
+
+
+def ustoii32rr:Pat < (i32 (zext GPRI16:$src)),
+(IL_ASINTi32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16))) >;
+
+
+def sstoii32rr:Pat < (i32 (sext GPRI16:$src)),
+(IL_ASINTi32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16))) >;
+
+
+def astoli64rr:Pat < (i64 (anyext GPRI16:$src)),
+(LCREATEi64rr
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 0)) >;
+
+
+def ustoli64rr:Pat < (i64 (zext GPRI16:$src)),
+(LCREATEi64rr
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 0)) >;
+
+
+def sstoli64rr:Pat < (i64 (sext GPRI16:$src)),
+(LCREATEi64rr
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16)),
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 31))) >;
+
+
+def aitoli64rr:Pat < (i64 (anyext GPRI32:$src)),
+(LCREATEi64rr
+(IL_ASINTi32r GPRI32:$src),
+ (LOADCONSTi32 0)) >;
+
+
+def uitoli64rr:Pat < (i64 (zext GPRI32:$src)),
+(LCREATEi64rr
+(IL_ASINTi32r GPRI32:$src),
+ (LOADCONSTi32 0)) >;
+
+
+def sitoli64rr:Pat < (i64 (sext GPRI32:$src)),
+(LCREATEi64rr
+(IL_ASINTi32r GPRI32:$src),
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi32r GPRI32:$src),
+ (LOADCONSTi32 0)),
+ (LOADCONSTi32 31))) >;
-def sctof_f32:Pat < (f32 (sint_to_fp GPRI8:$src)),
+def sctoff32rr:Pat < (f32 (sint_to_fp GPRI8:$src)),
(f32
- (ITOF
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24)))) >;
+ (ITOFf32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24)))) >;
-def uctof_f32:Pat < (f32 (uint_to_fp GPRI8:$src)),
+def uctoff32rr:Pat < (f32 (uint_to_fp GPRI8:$src)),
(f32
- (UTOF
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24)))) >;
+ (UTOFf32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24)))) >;
-def ftosc_i8:Pat < (i8 (fp_to_sint GPRF32:$src)),
+def ftosci8rr:Pat < (i8 (fp_to_sint GPRF32:$src)),
(i8
- (IL_ASCHAR_i32
- (BINARY_AND_i32
-(FTOI GPRF32:$src),
- (LOADCONST_i32 0x000000FF)))) >;
+ (IL_ASCHARi32r
+ (ANDi32rr
+(FTOIi32r GPRF32:$src),
+ (LOADCONSTi32 0x000000FF)))) >;
-def ftouc_i8:Pat < (i8 (fp_to_uint GPRF32:$src)),
+def ftouci8rr:Pat < (i8 (fp_to_uint GPRF32:$src)),
(i8
- (IL_ASCHAR_i32
- (BINARY_AND_i32
-(FTOU GPRF32:$src),
- (LOADCONST_i32 0x000000FF)))) >;
-
-
-def sctod_f64:Pat < (f64 (sint_to_fp GPRI8:$src)),
-(f64 (FTOD
- (ITOF
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24))))) >;
-
-
-def uctod_f64:Pat < (f64 (uint_to_fp GPRI8:$src)),
-(f64 (FTOD
- (UTOF
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i8 GPRI8:$src),
- (LOADCONST_i32 24)),
- (LOADCONST_i32 24))))) >;
+ (IL_ASCHARi32r
+ (ANDi32rr
+(FTOUi32r GPRF32:$src),
+ (LOADCONSTi32 0x000000FF)))) >;
+
+
+def sctodf64rr:Pat < (f64 (sint_to_fp GPRI8:$src)),
+(f64 (FTODr
+ (ITOFf32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))))) >;
+
+
+def uctodf64rr:Pat < (f64 (uint_to_fp GPRI8:$src)),
+(f64 (FTODr
+ (UTOFf32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi8r GPRI8:$src),
+ (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))))) >;
-def dtosc_i8:Pat < (i8 (fp_to_sint GPRF64:$src)),
+def dtosci8rr:Pat < (i8 (fp_to_sint GPRF64:$src)),
(i8
- (IL_ASCHAR_i32
- (BINARY_AND_i32
-(FTOI (DTOF GPRF64:$src)),
- (LOADCONST_i32 0x000000FF)))) >;
+ (IL_ASCHARi32r
+ (ANDi32rr
+(FTOIi32r (DTOFr GPRF64:$src)),
+ (LOADCONSTi32 0x000000FF)))) >;
-def dtouc_i8:Pat < (i8 (fp_to_uint GPRF64:$src)),
+def dtouci8rr:Pat < (i8 (fp_to_uint GPRF64:$src)),
(i8
- (IL_ASCHAR_i32
- (BINARY_AND_i32
-(FTOU (DTOF GPRF64:$src)),
- (LOADCONST_i32 0x000000FF)))) >;
+ (IL_ASCHARi32r
+ (ANDi32rr
+(FTOUi32r (DTOFr GPRF64:$src)),
+ (LOADCONSTi32 0x000000FF)))) >;
-def sstof_f32:Pat < (f32 (sint_to_fp GPRI16:$src)),
+def sstoff32rr:Pat < (f32 (sint_to_fp GPRI16:$src)),
(f32
- (ITOF
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16)))) >;
+ (ITOFf32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16)))) >;
-def ustof_f32:Pat < (f32 (uint_to_fp GPRI16:$src)),
+def ustoff32rr:Pat < (f32 (uint_to_fp GPRI16:$src)),
(f32
- (UTOF
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16)))) >;
+ (UTOFf32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16)))) >;
-def ftoss_i16:Pat < (i16 (fp_to_sint GPRF32:$src)),
+def ftossi16rr:Pat < (i16 (fp_to_sint GPRF32:$src)),
(i16
- (IL_ASSHORT_i32
- (BINARY_AND_i32
-(FTOI GPRF32:$src),
- (LOADCONST_i32 0x0000FFFF)))) >;
+ (IL_ASSHORTi32r
+ (ANDi32rr
+(FTOIi32r GPRF32:$src),
+ (LOADCONSTi32 0x0000FFFF)))) >;
-def ftous_i16:Pat < (i16 (fp_to_uint GPRF32:$src)),
+def ftousi16rr:Pat < (i16 (fp_to_uint GPRF32:$src)),
(i16
- (IL_ASSHORT_i32
- (BINARY_AND_i32
-(FTOU GPRF32:$src),
- (LOADCONST_i32 0x0000FFFF)))) >;
-
-
-def sstod_f64:Pat < (f64 (sint_to_fp GPRI16:$src)),
-(f64 (FTOD
- (ITOF
- (SHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16))))) >;
-
-
-def ustod_f64:Pat < (f64 (uint_to_fp GPRI16:$src)),
-(f64 (FTOD
- (UTOF
- (USHR_i32
- (SHL_i32
-(IL_ASINT_i16 GPRI16:$src),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16))))) >;
+ (IL_ASSHORTi32r
+ (ANDi32rr
+(FTOUi32r GPRF32:$src),
+ (LOADCONSTi32 0x0000FFFF)))) >;
+
+
+def sstodf64rr:Pat < (f64 (sint_to_fp GPRI16:$src)),
+(f64 (FTODr
+ (ITOFf32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16))))) >;
+
+
+def ustodf64rr:Pat < (f64 (uint_to_fp GPRI16:$src)),
+(f64 (FTODr
+ (UTOFf32r
+ (USHRi32i32rr
+ (SHLi32i32rr
+(IL_ASINTi16r GPRI16:$src),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16))))) >;
-def dtoss_i16:Pat < (i16 (fp_to_sint GPRF64:$src)),
+def dtossi16rr:Pat < (i16 (fp_to_sint GPRF64:$src)),
(i16
- (IL_ASSHORT_i32
- (BINARY_AND_i32
-(FTOI (DTOF GPRF64:$src)),
- (LOADCONST_i32 0x0000FFFF)))) >;
+ (IL_ASSHORTi32r
+ (ANDi32rr
+(FTOIi32r (DTOFr GPRF64:$src)),
+ (LOADCONSTi32 0x0000FFFF)))) >;
-def dtous_i16:Pat < (i16 (fp_to_uint GPRF64:$src)),
+def dtousi16rr:Pat < (i16 (fp_to_uint GPRF64:$src)),
(i16
- (IL_ASSHORT_i32
- (BINARY_AND_i32
-(FTOU (DTOF GPRF64:$src)),
- (LOADCONST_i32 0x0000FFFF)))) >;
+ (IL_ASSHORTi32r
+ (ANDi32rr
+(FTOUi32r (DTOFr GPRF64:$src)),
+ (LOADCONSTi32 0x0000FFFF)))) >;
-def stoc_i8:Pat < (i8 (trunc GPRI16:$src)),
-(IL_ASCHAR_i32
- (SHR_i32
- (SHL_i32
- (IL_ASINT_i16
-(BINARY_AND_i16 GPRI16:$src,
- (LOADCONST_i16 0x000000FF))),
- (LOADCONST_i32 16)),
- (LOADCONST_i32 16))
+def stoci8rr:Pat < (i8 (trunc GPRI16:$src)),
+(IL_ASCHARi32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+ (IL_ASINTi16r
+(ANDi16rr GPRI16:$src,
+ (LOADCONSTi16 0x000000FF))),
+ (LOADCONSTi32 16)),
+ (LOADCONSTi32 16))
)>;
-def itoc_i8:Pat < (i8 (trunc GPRI32:$src)),
-(IL_ASCHAR_i32
- (SHR_i32
- (SHL_i32
- (IL_ASINT_i32
-(BINARY_AND_i32 GPRI32:$src,
- (LOADCONST_i32 0x000000FF)))
- , (LOADCONST_i32 24)),
- (LOADCONST_i32 24))
+def itoci8rr:Pat < (i8 (trunc GPRI32:$src)),
+(IL_ASCHARi32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+ (IL_ASINTi32r
+(ANDi32rr GPRI32:$src,
+ (LOADCONSTi32 0x000000FF)))
+ , (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))
) >;
-def itos_i16:Pat < (i16 (trunc GPRI32:$src)),
-(IL_ASSHORT_i32
- (SHR_i32
- (SHL_i32
- (IL_ASINT_i32
-(BINARY_AND_i32 GPRI32:$src,
- (LOADCONST_i32 0x0000FFFF)))
- , (LOADCONST_i32 16)),
- (LOADCONST_i32 16))
+def itosi16rr:Pat < (i16 (trunc GPRI32:$src)),
+(IL_ASSHORTi32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+ (IL_ASINTi32r
+(ANDi32rr GPRI32:$src,
+ (LOADCONSTi32 0x0000FFFF)))
+ , (LOADCONSTi32 16)),
+ (LOADCONSTi32 16))
) >;
-def ltoc_i8:Pat < (i8 (trunc GPRI64:$src)),
-(IL_ASCHAR_i32
- (SHR_i32
- (SHL_i32
- (BINARY_AND_i32
-(LLO GPRI64:$src),
- (LOADCONST_i32 0x000000FF))
- , (LOADCONST_i32 24)),
- (LOADCONST_i32 24))
+def ltoci8rr:Pat < (i8 (trunc GPRI64:$src)),
+(IL_ASCHARi32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+ (ANDi32rr
+(LLOi64r GPRI64:$src),
+ (LOADCONSTi32 0x000000FF))
+ , (LOADCONSTi32 24)),
+ (LOADCONSTi32 24))
) >;
-def ltos_i16:Pat < (i16 (trunc GPRI64:$src)),
-(IL_ASSHORT_i32
- (SHR_i32
- (SHL_i32
- (BINARY_AND_i32
-(LLO GPRI64:$src),
- (LOADCONST_i32 0x0000FFFF))
- , (LOADCONST_i32 16)),
- (LOADCONST_i32 16))
+def ltosi16rr:Pat < (i16 (trunc GPRI64:$src)),
+(IL_ASSHORTi32r
+ (SHRi32i32rr
+ (SHLi32i32rr
+ (ANDi32rr
+(LLOi64r GPRI64:$src),
+ (LOADCONSTi32 0x0000FFFF))
+ , (LOADCONSTi32 16)),
+ (LOADCONSTi32 16))
) >;
-def ltoi_i32:Pat < (i32 (trunc GPRI64:$src)), (LLO GPRI64:$src) >;
+def ltoii32rr:Pat < (i32 (trunc GPRI64:$src)), (LLOi64r GPRI64:$src) >;
-def actos_v2i16:Pat < (v2i16 (anyext GPRV2I8:$src)),
-(IL_ASV2SHORT_v2i32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
-
-
-def uctos_v2i16:Pat < (v2i16 (zext GPRV2I8:$src)),
-(IL_ASV2SHORT_v2i32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
-
-
-def sctos_v2i16:Pat < (v2i16 (sext GPRV2I8:$src)),
-(IL_ASV2SHORT_v2i32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
-
-
-def actoi_v2i32:Pat < (v2i32 (anyext GPRV2I8:$src)),
-(IL_ASV2INT_v2i32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
-
-
-def uctoi_v2i32:Pat < (v2i32 (zext GPRV2I8:$src)),
-(IL_ASV2INT_v2i32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
-
-
-def sctoi_v2i32:Pat < (v2i32 (sext GPRV2I8:$src)),
-(IL_ASV2INT_v2i32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))) >;
-
-
-def actol_v2i64:Pat < (v2i64 (anyext GPRV2I8:$src)),
-(LCREATE_v2i64
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 0))) >;
-
-
-def uctol_v2i64:Pat < (v2i64 (zext GPRV2I8:$src)),
-(LCREATE_v2i64
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 0))) >;
-
-
-def sctol_v2i64:Pat < (v2i64 (sext GPRV2I8:$src)),
-(LCREATE_v2i64
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
-
-
-def astoi_v2i32:Pat < (v2i32 (anyext GPRV2I16:$src)),
-(IL_ASV2INT_v2i32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
-
-
-def ustoi_v2i32:Pat < (v2i32 (zext GPRV2I16:$src)),
-(IL_ASV2INT_v2i32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
-
-
-def sstoi_v2i32:Pat < (v2i32 (sext GPRV2I16:$src)),
-(IL_ASV2INT_v2i32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))) >;
-
-
-def astol_v2i64:Pat < (v2i64 (anyext GPRV2I16:$src)),
-(LCREATE_v2i64
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 0))) >;
-
-
-def ustol_v2i64:Pat < (v2i64 (zext GPRV2I16:$src)),
-(LCREATE_v2i64
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 0))) >;
-
-
-def sstol_v2i64:Pat < (v2i64 (sext GPRV2I16:$src)),
-(LCREATE_v2i64
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
-
-
-def aitol_v2i64:Pat < (v2i64 (anyext GPRV2I32:$src)),
-(LCREATE_v2i64
-(IL_ASV2INT_v2i32 GPRV2I32:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0))) >;
-
-
-def uitol_v2i64:Pat < (v2i64 (zext GPRV2I32:$src)),
-(LCREATE_v2i64
-(IL_ASV2INT_v2i32 GPRV2I32:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0))) >;
-
-
-def sitol_v2i64:Pat < (v2i64 (sext GPRV2I32:$src)),
-(LCREATE_v2i64
-(IL_ASV2INT_v2i32 GPRV2I32:$src),
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i32 GPRV2I32:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0))),
- (VCREATE_v2i32 (LOADCONST_i32 31)))) >;
+def actosv2i16rr:Pat < (v2i16 (anyext GPRV2I8:$src)),
+(IL_ASV2SHORTv2i32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))) >;
+
+
+def uctosv2i16rr:Pat < (v2i16 (zext GPRV2I8:$src)),
+(IL_ASV2SHORTv2i32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))) >;
+
+
+def sctosv2i16rr:Pat < (v2i16 (sext GPRV2I8:$src)),
+(IL_ASV2SHORTv2i32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))) >;
+
+
+def actoiv2i32rr:Pat < (v2i32 (anyext GPRV2I8:$src)),
+(IL_ASV2INTv2i32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))) >;
+
+
+def uctoiv2i32rr:Pat < (v2i32 (zext GPRV2I8:$src)),
+(IL_ASV2INTv2i32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))) >;
+
+
+def sctoiv2i32rr:Pat < (v2i32 (sext GPRV2I8:$src)),
+(IL_ASV2INTv2i32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))) >;
+
+
+def actolv2i64rr:Pat < (v2i64 (anyext GPRV2I8:$src)),
+(LCREATEv2i64rr
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 0))) >;
+
+
+def uctolv2i64rr:Pat < (v2i64 (zext GPRV2I8:$src)),
+(LCREATEv2i64rr
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 0))) >;
+
+
+def sctolv2i64rr:Pat < (v2i64 (sext GPRV2I8:$src)),
+(LCREATEv2i64rr
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 31)))) >;
+
+
+def astoiv2i32rr:Pat < (v2i32 (anyext GPRV2I16:$src)),
+(IL_ASV2INTv2i32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))) >;
+
+
+def ustoiv2i32rr:Pat < (v2i32 (zext GPRV2I16:$src)),
+(IL_ASV2INTv2i32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))) >;
+
+
+def sstoiv2i32rr:Pat < (v2i32 (sext GPRV2I16:$src)),
+(IL_ASV2INTv2i32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))) >;
+
+
+def astolv2i64rr:Pat < (v2i64 (anyext GPRV2I16:$src)),
+(LCREATEv2i64rr
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 0))) >;
+
+
+def ustolv2i64rr:Pat < (v2i64 (zext GPRV2I16:$src)),
+(LCREATEv2i64rr
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 0))) >;
+
+
+def sstolv2i64rr:Pat < (v2i64 (sext GPRV2I16:$src)),
+(LCREATEv2i64rr
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 31)))) >;
+
+
+def aitolv2i64rr:Pat < (v2i64 (anyext GPRV2I32:$src)),
+(LCREATEv2i64rr
+(IL_ASV2INTv2i32r GPRV2I32:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0))) >;
+
+
+def uitolv2i64rr:Pat < (v2i64 (zext GPRV2I32:$src)),
+(LCREATEv2i64rr
+(IL_ASV2INTv2i32r GPRV2I32:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0))) >;
+
+
+def sitolv2i64rr:Pat < (v2i64 (sext GPRV2I32:$src)),
+(LCREATEv2i64rr
+(IL_ASV2INTv2i32r GPRV2I32:$src),
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i32r GPRV2I32:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0))),
+ (VCREATEv2i32r (LOADCONSTi32 31)))) >;
-def sctof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I8:$src)),
+def sctofv2f32rr:Pat < (v2f32 (sint_to_fp GPRV2I8:$src)),
(v2f32
- (ITOF_v2f32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+ (ITOFv2f32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24))))) >;
-def uctof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)),
+def uctofv2f32rr:Pat < (v2f32 (uint_to_fp GPRV2I8:$src)),
(v2f32
- (UTOF_v2f32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24))))) >;
+ (UTOFv2f32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24))))) >;
-def ftosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F32:$src)),
+def ftoscv2i8rr:Pat < (v2i8 (fp_to_sint GPRV2F32:$src)),
(v2i8
- (IL_ASV2CHAR_v2i32
- (BINARY_AND_v2i32
-(FTOI_v2i32 GPRV2F32:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+ (IL_ASV2CHARv2i32r
+ (ANDv2i32rr
+(FTOIv2i32r GPRV2F32:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0x000000FF))))) >;
-def ftouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F32:$src)),
+def ftoucv2i8rr:Pat < (v2i8 (fp_to_uint GPRV2F32:$src)),
(v2i8
- (IL_ASV2CHAR_v2i32
- (BINARY_AND_v2i32
-(FTOU_v2i32 GPRV2F32:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+ (IL_ASV2CHARv2i32r
+ (ANDv2i32rr
+(FTOUv2i32r GPRV2F32:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0x000000FF))))) >;
-def sctod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I8:$src)),
+def sctodv2f64rr:Pat < (v2f64 (sint_to_fp GPRV2I8:$src)),
(v2f64
- (VINSERT_v2f64
- (VCREATE_v2f64
- (FTOD
- (VEXTRACT_v2f32
- (ITOF_v2f32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ (VINSERTv2f64rr
+ (VCREATEv2f64r
+ (FTODr
+ (VEXTRACTv2f32r
+ (ITOFv2f32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))),
1)
)),
- (FTOD
- (VEXTRACT_v2f32
- (ITOF_v2f32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ (FTODr
+ (VEXTRACTv2f32r
+ (ITOFv2f32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))),
2)
), 1, 256)
) >;
-def uctod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)),
+def uctodv2f64rr:Pat < (v2f64 (uint_to_fp GPRV2I8:$src)),
(v2f64
- (VINSERT_v2f64
- (VCREATE_v2f64
- (FTOD
- (VEXTRACT_v2f32
- (UTOF_v2f32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ (VINSERTv2f64rr
+ (VCREATEv2f64r
+ (FTODr
+ (VEXTRACTv2f32r
+ (UTOFv2f32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))),
1)
)),
- (FTOD
- (VEXTRACT_v2f32
- (UTOF_v2f32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i8 GPRV2I8:$src),
- (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))),
+ (FTODr
+ (VEXTRACTv2f32r
+ (UTOFv2f32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i8r GPRV2I8:$src),
+ (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))),
2)
), 1, 256)
) >;
-def dtosc_v2i8:Pat < (v2i8 (fp_to_sint GPRV2F64:$src)),
+def dtoscv2i8rr:Pat < (v2i8 (fp_to_sint GPRV2F64:$src)),
(v2i8
- (IL_ASV2CHAR_v2i32
- (BINARY_AND_v2i32
-(FTOI_v2i32 (VINSERT_v2f32
- (VCREATE_v2f32
- (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
- (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
- (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+ (IL_ASV2CHARv2i32r
+ (ANDv2i32rr
+(FTOIv2i32r (VINSERTv2f32rr
+ (VCREATEv2f32r
+ (DTOFr (VEXTRACTv2f64r GPRV2F64:$src, 1))),
+ (DTOFr (VEXTRACTv2f64r GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATEv2i32r (LOADCONSTi32 0x000000FF))))) >;
-def dtouc_v2i8:Pat < (v2i8 (fp_to_uint GPRV2F64:$src)),
+def dtoucv2i8rr:Pat < (v2i8 (fp_to_uint GPRV2F64:$src)),
(v2i8
- (IL_ASV2CHAR_v2i32
- (BINARY_AND_v2i32
-(FTOU_v2i32 (VINSERT_v2f32
- (VCREATE_v2f32
- (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
- (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
- (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))) >;
+ (IL_ASV2CHARv2i32r
+ (ANDv2i32rr
+(FTOUv2i32r (VINSERTv2f32rr
+ (VCREATEv2f32r
+ (DTOFr (VEXTRACTv2f64r GPRV2F64:$src, 1))),
+ (DTOFr (VEXTRACTv2f64r GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATEv2i32r (LOADCONSTi32 0x000000FF))))) >;
-def sstof_v2f32:Pat < (v2f32 (sint_to_fp GPRV2I16:$src)),
+def sstofv2f32rr:Pat < (v2f32 (sint_to_fp GPRV2I16:$src)),
(v2f32
- (ITOF_v2f32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+ (ITOFv2f32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16))))) >;
-def ustof_v2f32:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)),
+def ustofv2f32rr:Pat < (v2f32 (uint_to_fp GPRV2I16:$src)),
(v2f32
- (UTOF_v2f32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16))))) >;
+ (UTOFv2f32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16))))) >;
-def ftoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F32:$src)),
+def ftossv2i16rr:Pat < (v2i16 (fp_to_sint GPRV2F32:$src)),
(v2i16
- (IL_ASV2SHORT_v2i32
- (BINARY_AND_v2i32
-(FTOI_v2i32 GPRV2F32:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+ (IL_ASV2SHORTv2i32r
+ (ANDv2i32rr
+(FTOIv2i32r GPRV2F32:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0x0000FFFF))))) >;
-def ftous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F32:$src)),
+def ftousv2i16rr:Pat < (v2i16 (fp_to_uint GPRV2F32:$src)),
(v2i16
- (IL_ASV2SHORT_v2i32
- (BINARY_AND_v2i32
-(FTOU_v2i32 GPRV2F32:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+ (IL_ASV2SHORTv2i32r
+ (ANDv2i32rr
+(FTOUv2i32r GPRV2F32:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0x0000FFFF))))) >;
-def sstod_v2f64:Pat < (v2f64 (sint_to_fp GPRV2I16:$src)),
+def sstodv2f64rr:Pat < (v2f64 (sint_to_fp GPRV2I16:$src)),
(v2f64
- (VINSERT_v2f64
- (VCREATE_v2f64
- (FTOD
- (VEXTRACT_v2f32
- (ITOF_v2f32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ (VINSERTv2f64rr
+ (VCREATEv2f64r
+ (FTODr
+ (VEXTRACTv2f32r
+ (ITOFv2f32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))),
1)
)),
- (FTOD
- (VEXTRACT_v2f32
- (ITOF_v2f32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ (FTODr
+ (VEXTRACTv2f32r
+ (ITOFv2f32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))),
2)
), 1, 256)
) >;
-def ustod_v2f64:Pat < (v2f64 (uint_to_fp GPRV2I16:$src)),
+def ustodv2f64rr:Pat < (v2f64 (uint_to_fp GPRV2I16:$src)),
(v2f64
- (VINSERT_v2f64
- (VCREATE_v2f64
- (FTOD
- (VEXTRACT_v2f32
- (UTOF_v2f32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ (VINSERTv2f64rr
+ (VCREATEv2f64r
+ (FTODr
+ (VEXTRACTv2f32r
+ (UTOFv2f32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))),
1)
)),
- (FTOD
- (VEXTRACT_v2f32
- (UTOF_v2f32
- (USHRVEC_v2i32
- (SHLVEC_v2i32
-(IL_ASV2INT_v2i16 GPRV2I16:$src),
- (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))),
+ (FTODr
+ (VEXTRACTv2f32r
+ (UTOFv2f32r
+ (USHRv2i32i32rr
+ (SHLv2i32i32rr
+(IL_ASV2INTv2i16r GPRV2I16:$src),
+ (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))),
2)
), 1, 256)
) >;
-def dtoss_v2i16:Pat < (v2i16 (fp_to_sint GPRV2F64:$src)),
+def dtossv2i16rr:Pat < (v2i16 (fp_to_sint GPRV2F64:$src)),
(v2i16
- (IL_ASV2SHORT_v2i32
- (BINARY_AND_v2i32
-(FTOI_v2i32 (VINSERT_v2f32
- (VCREATE_v2f32
- (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
- (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
- (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
+ (IL_ASV2SHORTv2i32r
+ (ANDv2i32rr
+(FTOIv2i32r (VINSERTv2f32rr
+ (VCREATEv2f32r
+ (DTOFr (VEXTRACTv2f64r GPRV2F64:$src, 1))),
+ (DTOFr (VEXTRACTv2f64r GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATEv2i32r (LOADCONSTi32 0x0000FFFF))))) >;
-def dtous_v2i16:Pat < (v2i16 (fp_to_uint GPRV2F64:$src)),
+def dtousv2i16rr:Pat < (v2i16 (fp_to_uint GPRV2F64:$src)),
(v2i16
- (IL_ASV2SHORT_v2i32
- (BINARY_AND_v2i32
-(FTOU_v2i32 (VINSERT_v2f32
- (VCREATE_v2f32
- (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 1))),
- (DTOF (VEXTRACT_v2f64 GPRV2F64:$src, 2)), 1, 256)),
- (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))) >;
-
-def stoc_v2i8:Pat < (v2i8 (trunc GPRV2I16:$src)),
-(IL_ASV2CHAR_v2i32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
- (IL_ASV2INT_v2i16
-(BINARY_AND_v2i16 GPRV2I16:$src,
- (VCREATE_v2i16 (LOADCONST_i16 0x000000FF))))
- , (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))
+ (IL_ASV2SHORTv2i32r
+ (ANDv2i32rr
+(FTOUv2i32r (VINSERTv2f32rr
+ (VCREATEv2f32r
+ (DTOFr (VEXTRACTv2f64r GPRV2F64:$src, 1))),
+ (DTOFr (VEXTRACTv2f64r GPRV2F64:$src, 2)), 1, 256)),
+ (VCREATEv2i32r (LOADCONSTi32 0x0000FFFF))))) >;
+
+def stocv2i8rr:Pat < (v2i8 (trunc GPRV2I16:$src)),
+(IL_ASV2CHARv2i32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+ (IL_ASV2INTv2i16r
+(ANDv2i16rr GPRV2I16:$src,
+ (VCREATEv2i16r (LOADCONSTi16 0x000000FF))))
+ , (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))
) >;
-def itoc_v2i8:Pat < (v2i8 (trunc GPRV2I32:$src)),
-(IL_ASV2CHAR_v2i32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
- (IL_ASV2INT_v2i32
-(BINARY_AND_v2i32 GPRV2I32:$src,
- (VCREATE_v2i32 (LOADCONST_i32 0x000000FF))))
- , (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))
+def itocv2i8rr:Pat < (v2i8 (trunc GPRV2I32:$src)),
+(IL_ASV2CHARv2i32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+ (IL_ASV2INTv2i32r
+(ANDv2i32rr GPRV2I32:$src,
+ (VCREATEv2i32r (LOADCONSTi32 0x000000FF))))
+ , (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))
) >;
-def itos_v2i16:Pat < (v2i16 (trunc GPRV2I32:$src)),
-(IL_ASV2SHORT_v2i32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
- (IL_ASV2INT_v2i32
-(BINARY_AND_v2i32 GPRV2I32:$src,
- (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF))))
- , (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))
+def itosv2i16rr:Pat < (v2i16 (trunc GPRV2I32:$src)),
+(IL_ASV2SHORTv2i32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+ (IL_ASV2INTv2i32r
+(ANDv2i32rr GPRV2I32:$src,
+ (VCREATEv2i32r (LOADCONSTi32 0x0000FFFF))))
+ , (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))
) >;
-def ltoc_v2i8:Pat < (v2i8 (trunc GPRV2I64:$src)),
-(IL_ASV2CHAR_v2i32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
- (BINARY_AND_v2i32
-(LLO_v2i64 GPRV2I64:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0x000000FF)))
- , (VCREATE_v2i32 (LOADCONST_i32 24))),
- (VCREATE_v2i32 (LOADCONST_i32 24)))
+def ltocv2i8rr:Pat < (v2i8 (trunc GPRV2I64:$src)),
+(IL_ASV2CHARv2i32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+ (ANDv2i32rr
+(LLOv2i64r GPRV2I64:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0x000000FF)))
+ , (VCREATEv2i32r (LOADCONSTi32 24))),
+ (VCREATEv2i32r (LOADCONSTi32 24)))
) >;
-def ltos_v2i16:Pat < (v2i16 (trunc GPRV2I64:$src)),
-(IL_ASV2SHORT_v2i32
- (SHRVEC_v2i32
- (SHLVEC_v2i32
- (BINARY_AND_v2i32
-(LLO_v2i64 GPRV2I64:$src),
- (VCREATE_v2i32 (LOADCONST_i32 0x0000FFFF)))
- , (VCREATE_v2i32 (LOADCONST_i32 16))),
- (VCREATE_v2i32 (LOADCONST_i32 16)))
+def ltosv2i16rr:Pat < (v2i16 (trunc GPRV2I64:$src)),
+(IL_ASV2SHORTv2i32r
+ (SHRv2i32i32rr
+ (SHLv2i32i32rr
+ (ANDv2i32rr
+(LLOv2i64r GPRV2I64:$src),
+ (VCREATEv2i32r (LOADCONSTi32 0x0000FFFF)))
+ , (VCREATEv2i32r (LOADCONSTi32 16))),
+ (VCREATEv2i32r (LOADCONSTi32 16)))
) >;
-def ltoi_v2i32:Pat < (v2i32 (trunc GPRV2I64:$src)), (LLO_v2i64 GPRV2I64:$src)>;
+def ltoiv2i32rr:Pat < (v2i32 (trunc GPRV2I64:$src)), (LLOv2i64r GPRV2I64:$src)>;
-def actos_v4i16:Pat < (v4i16 (anyext GPRV4I8:$src)),
-(IL_ASV4SHORT_v4i32
- (USHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i8 GPRV4I8:$src),
- (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+def actosv4i16rr:Pat < (v4i16 (anyext GPRV4I8:$src)),
+(IL_ASV4SHORTv4i32r
+ (USHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i8r GPRV4I8:$src),
+ (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24)))) >;
-def uctos_v4i16:Pat < (v4i16 (zext GPRV4I8:$src)),
-(IL_ASV4SHORT_v4i32
- (USHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i8 GPRV4I8:$src),
- (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+def uctosv4i16rr:Pat < (v4i16 (zext GPRV4I8:$src)),
+(IL_ASV4SHORTv4i32r
+ (USHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i8r GPRV4I8:$src),
+ (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24)))) >;
-def sctos_v4i16:Pat < (v4i16 (sext GPRV4I8:$src)),
-(IL_ASV4SHORT_v4i32
- (SHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i8 GPRV4I8:$src),
- (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+def sctosv4i16rr:Pat < (v4i16 (sext GPRV4I8:$src)),
+(IL_ASV4SHORTv4i32r
+ (SHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i8r GPRV4I8:$src),
+ (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24)))) >;
-def actoi_v4i32:Pat < (v4i32 (anyext GPRV4I8:$src)),
-(IL_ASV4INT_v4i32
- (USHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i8 GPRV4I8:$src),
- (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+def actoiv4i32rr:Pat < (v4i32 (anyext GPRV4I8:$src)),
+(IL_ASV4INTv4i32r
+ (USHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i8r GPRV4I8:$src),
+ (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24)))) >;
-def uctoi_v4i32:Pat < (v4i32 (zext GPRV4I8:$src)),
-(IL_ASV4INT_v4i32
- (USHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i8 GPRV4I8:$src),
- (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+def uctoiv4i32rr:Pat < (v4i32 (zext GPRV4I8:$src)),
+(IL_ASV4INTv4i32r
+ (USHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i8r GPRV4I8:$src),
+ (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24)))) >;
-def sctoi_v4i32:Pat < (v4i32 (sext GPRV4I8:$src)),
-(IL_ASV4INT_v4i32
- (SHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i8 GPRV4I8:$src),
- (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24)))) >;
+def sctoiv4i32rr:Pat < (v4i32 (sext GPRV4I8:$src)),
+(IL_ASV4INTv4i32r
+ (SHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i8r GPRV4I8:$src),
+ (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24)))) >;
-def astoi_v4i32:Pat < (v4i32 (anyext GPRV4I16:$src)),
-(IL_ASV4INT_v4i32
- (USHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i16 GPRV4I16:$src),
- (VCREATE_v4i32 (LOADCONST_i32 16))),
- (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+def astoiv4i32rr:Pat < (v4i32 (anyext GPRV4I16:$src)),
+(IL_ASV4INTv4i32r
+ (USHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i16r GPRV4I16:$src),
+ (VCREATEv4i32r (LOADCONSTi32 16))),
+ (VCREATEv4i32r (LOADCONSTi32 16)))) >;
-def ustoi_v4i32:Pat < (v4i32 (zext GPRV4I16:$src)),
-(IL_ASV4INT_v4i32
- (USHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i16 GPRV4I16:$src),
- (VCREATE_v4i32 (LOADCONST_i32 16))),
- (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+def ustoiv4i32rr:Pat < (v4i32 (zext GPRV4I16:$src)),
+(IL_ASV4INTv4i32r
+ (USHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i16r GPRV4I16:$src),
+ (VCREATEv4i32r (LOADCONSTi32 16))),
+ (VCREATEv4i32r (LOADCONSTi32 16)))) >;
-def sstoi_v4i32:Pat < (v4i32 (sext GPRV4I16:$src)),
-(IL_ASV4INT_v4i32
- (SHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i16 GPRV4I16:$src),
- (VCREATE_v4i32 (LOADCONST_i32 16))),
- (VCREATE_v4i32 (LOADCONST_i32 16)))) >;
+def sstoiv4i32rr:Pat < (v4i32 (sext GPRV4I16:$src)),
+(IL_ASV4INTv4i32r
+ (SHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i16r GPRV4I16:$src),
+ (VCREATEv4i32r (LOADCONSTi32 16))),
+ (VCREATEv4i32r (LOADCONSTi32 16)))) >;
-def sctof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I8:$src)),
+def sctofv4f32rr:Pat < (v4f32 (sint_to_fp GPRV4I8:$src)),
(v4f32
- (ITOF_v4f32
- (SHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i8 GPRV4I8:$src),
- (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+ (ITOFv4f32r
+ (SHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i8r GPRV4I8:$src),
+ (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24))))) >;
-def uctof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)),
+def uctofv4f32rr:Pat < (v4f32 (uint_to_fp GPRV4I8:$src)),
(v4f32
- (UTOF_v4f32
- (USHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i8 GPRV4I8:$src),
- (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24))))) >;
+ (UTOFv4f32r
+ (USHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i8r GPRV4I8:$src),
+ (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24))))) >;
-def ftosc_v4i8:Pat < (v4i8 (fp_to_sint GPRV4F32:$src)),
+def ftoscv4i8rr:Pat < (v4i8 (fp_to_sint GPRV4F32:$src)),
(v4i8
- (IL_ASV4CHAR_v4i32
- (BINARY_AND_v4i32
-(FTOI_v4i32 GPRV4F32:$src),
- (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+ (IL_ASV4CHARv4i32r
+ (ANDv4i32rr
+(FTOIv4i32r GPRV4F32:$src),
+ (VCREATEv4i32r (LOADCONSTi32 0x000000FF))))) >;
-def ftouc_v4i8:Pat < (v4i8 (fp_to_uint GPRV4F32:$src)),
+def ftoucv4i8rr:Pat < (v4i8 (fp_to_uint GPRV4F32:$src)),
(v4i8
- (IL_ASV4CHAR_v4i32
- (BINARY_AND_v4i32
-(FTOU_v4i32 GPRV4F32:$src),
- (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))) >;
+ (IL_ASV4CHARv4i32r
+ (ANDv4i32rr
+(FTOUv4i32r GPRV4F32:$src),
+ (VCREATEv4i32r (LOADCONSTi32 0x000000FF))))) >;
-def sstof_v4f32:Pat < (v4f32 (sint_to_fp GPRV4I16:$src)),
+def sstofv4f32rr:Pat < (v4f32 (sint_to_fp GPRV4I16:$src)),
(v4f32
- (ITOF_v4f32
- (SHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i16 GPRV4I16:$src),
- (VCREATE_v4i32 (LOADCONST_i32 16))),
- (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+ (ITOFv4f32r
+ (SHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i16r GPRV4I16:$src),
+ (VCREATEv4i32r (LOADCONSTi32 16))),
+ (VCREATEv4i32r (LOADCONSTi32 16))))) >;
-def ustof_v4f32:Pat < (v4f32 (uint_to_fp GPRV4I16:$src)),
+def ustofv4f32rr:Pat < (v4f32 (uint_to_fp GPRV4I16:$src)),
(v4f32
- (UTOF_v4f32
- (USHRVEC_v4i32
- (SHLVEC_v4i32
-(IL_ASV4INT_v4i16 GPRV4I16:$src),
- (VCREATE_v4i32 (LOADCONST_i32 16))),
- (VCREATE_v4i32 (LOADCONST_i32 16))))) >;
+ (UTOFv4f32r
+ (USHRv4i32i32rr
+ (SHLv4i32i32rr
+(IL_ASV4INTv4i16r GPRV4I16:$src),
+ (VCREATEv4i32r (LOADCONSTi32 16))),
+ (VCREATEv4i32r (LOADCONSTi32 16))))) >;
-def ftoss_v4i16:Pat < (v4i16 (fp_to_sint GPRV4F32:$src)),
+def ftossv4i16rr:Pat < (v4i16 (fp_to_sint GPRV4F32:$src)),
(v4i16
- (IL_ASV4SHORT_v4i32
- (BINARY_AND_v4i32
-(FTOI_v4i32 GPRV4F32:$src),
- (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+ (IL_ASV4SHORTv4i32r
+ (ANDv4i32rr
+(FTOIv4i32r GPRV4F32:$src),
+ (VCREATEv4i32r (LOADCONSTi32 0x0000FFFF))))) >;
-def ftous_v4i16:Pat < (v4i16 (fp_to_uint GPRV4F32:$src)),
+def ftousv4i16rr:Pat < (v4i16 (fp_to_uint GPRV4F32:$src)),
(v4i16
- (IL_ASV4SHORT_v4i32
- (BINARY_AND_v4i32
-(FTOU_v4i32 GPRV4F32:$src),
- (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))) >;
+ (IL_ASV4SHORTv4i32r
+ (ANDv4i32rr
+(FTOUv4i32r GPRV4F32:$src),
+ (VCREATEv4i32r (LOADCONSTi32 0x0000FFFF))))) >;
-def stoc_v4i8:Pat < (v4i8 (trunc GPRV4I16:$src)),
-(IL_ASV4CHAR_v4i32
- (SHRVEC_v4i32
- (SHLVEC_v4i32
- (IL_ASV4INT_v4i16
-(BINARY_AND_v4i16 GPRV4I16:$src,
- (VCREATE_v4i16 (LOADCONST_i16 0x000000FF))))
- , (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24)))
+def stocv4i8rr:Pat < (v4i8 (trunc GPRV4I16:$src)),
+(IL_ASV4CHARv4i32r
+ (SHRv4i32i32rr
+ (SHLv4i32i32rr
+ (IL_ASV4INTv4i16r
+(ANDv4i16rr GPRV4I16:$src,
+ (VCREATEv4i16r (LOADCONSTi16 0x000000FF))))
+ , (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24)))
) >;
-def itoc_v4i8:Pat < (v4i8 (trunc GPRV4I32:$src)),
-(IL_ASV4CHAR_v4i32
- (SHRVEC_v4i32
- (SHLVEC_v4i32
- (IL_ASV4INT_v4i32
-(BINARY_AND_v4i32 GPRV4I32:$src,
- (VCREATE_v4i32 (LOADCONST_i32 0x000000FF))))
- , (VCREATE_v4i32 (LOADCONST_i32 24))),
- (VCREATE_v4i32 (LOADCONST_i32 24)))
+def itocv4i8rr:Pat < (v4i8 (trunc GPRV4I32:$src)),
+(IL_ASV4CHARv4i32r
+ (SHRv4i32i32rr
+ (SHLv4i32i32rr
+ (IL_ASV4INTv4i32r
+(ANDv4i32rr GPRV4I32:$src,
+ (VCREATEv4i32r (LOADCONSTi32 0x000000FF))))
+ , (VCREATEv4i32r (LOADCONSTi32 24))),
+ (VCREATEv4i32r (LOADCONSTi32 24)))
) >;
-def itos_v4i16:Pat < (v4i16 (trunc GPRV4I32:$src)),
-(IL_ASV4SHORT_v4i32
- (SHRVEC_v4i32
- (SHLVEC_v4i32
- (IL_ASV4INT_v4i32
-(BINARY_AND_v4i32 GPRV4I32:$src,
- (VCREATE_v4i32 (LOADCONST_i32 0x0000FFFF))))
- , (VCREATE_v4i32 (LOADCONST_i32 16))),
- (VCREATE_v4i32 (LOADCONST_i32 16)))
+def itosv4i16rr:Pat < (v4i16 (trunc GPRV4I32:$src)),
+(IL_ASV4SHORTv4i32r
+ (SHRv4i32i32rr
+ (SHLv4i32i32rr
+ (IL_ASV4INTv4i32r
+(ANDv4i32rr GPRV4I32:$src,
+ (VCREATEv4i32r (LOADCONSTi32 0x0000FFFF))))
+ , (VCREATEv4i32r (LOADCONSTi32 16))),
+ (VCREATEv4i32r (LOADCONSTi32 16)))
) >;
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.cpp Wed Sep 12 12:43:34 2012
@@ -22,24 +22,20 @@
setCaps();
mDeviceFlag = OCL_DEVICE_ALL;
}
-
AMDILDevice::~AMDILDevice()
{
mHWBits.clear();
mSWBits.clear();
}
-
size_t AMDILDevice::getMaxGDSSize() const
{
return 0;
}
-
uint32_t
AMDILDevice::getDeviceFlag() const
{
return mDeviceFlag;
}
-
size_t AMDILDevice::getMaxNumCBs() const
{
if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
@@ -48,7 +44,6 @@
return 0;
}
-
size_t AMDILDevice::getMaxCBSize() const
{
if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
@@ -57,17 +52,14 @@
return 0;
}
-
size_t AMDILDevice::getMaxScratchSize() const
{
return 65536;
}
-
uint32_t AMDILDevice::getStackAlignment() const
{
return 16;
}
-
void AMDILDevice::setCaps()
{
mSWBits.set(AMDILDeviceInfo::HalfOps);
@@ -104,7 +96,6 @@
mSWBits.set(AMDILDeviceInfo::ByteGDSOps);
mSWBits.set(AMDILDeviceInfo::LongOps);
}
-
AMDILDeviceInfo::ExecutionMode
AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
{
@@ -119,24 +110,19 @@
}
return AMDILDeviceInfo::Unsupported;
-
}
-
bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
{
return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
}
-
bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
{
return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
}
-
bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
{
return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
}
-
std::string
AMDILDevice::getDataLayout() const
{
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevice.h Wed Sep 12 12:43:34 2012
@@ -16,8 +16,7 @@
#include "AMDIL.h"
#include "AMDILLLVMPC.h"
#include "llvm/ADT/BitVector.h"
-namespace llvm
-{
+namespace llvm {
class AMDILSubtarget;
class AMDILAsmPrinter;
class AMDILIOExpansion;
@@ -25,8 +24,7 @@
//===----------------------------------------------------------------------===//
// Interface for data that is specific to a single device
//===----------------------------------------------------------------------===//
-class AMDILDevice
-{
+class AMDILDevice {
public:
AMDILDevice(AMDILSubtarget *ST);
virtual ~AMDILDevice();
@@ -83,8 +81,7 @@
virtual uint32_t getMaxNumUAVs() const = 0;
// Interface to get the IO Expansion pass for each device.
- virtual FunctionPass*
- getIOExpansion(TargetMachine&, CodeGenOpt::Level) const = 0;
+ virtual FunctionPass* getIOExpansion() const = 0;
// Interface to get the Asm printer for each device.
virtual AsmPrinter*
@@ -94,7 +91,6 @@
virtual FunctionPass*
getPointerManager(TargetMachine&, CodeGenOpt::Level) const = 0;
-
// API utilizing more detailed capabilities of each family of
// cards. If a capability is supported, then either usesHardware or
// usesSoftware returned true. If usesHardware returned true, then
@@ -125,6 +121,5 @@
AMDILDeviceInfo::ExecutionMode
getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
}; // AMDILDevice
-
} // namespace llvm
#endif // _AMDILDEVICEIMPL_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.cpp Wed Sep 12 12:43:34 2012
@@ -15,10 +15,12 @@
#include "AMDILSubtarget.h"
#include <string>
using namespace llvm;
-namespace llvm
-{
+namespace llvm {
AMDILDevice*
-getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
+getDeviceFromName(const std::string &deviceName,
+ AMDILSubtarget *ptr,
+ bool is64bit,
+ bool is64on32bit)
{
if (deviceName.c_str()[2] == '7') {
switch (deviceName.c_str()[3]) {
@@ -88,33 +90,18 @@
#endif
return new AMDILNIDevice(ptr);
} else if (deviceName == "tahiti") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
if (is64bit) {
return new AMDILSIDevice64(ptr);
} else {
return new AMDILSIDevice32(ptr);
}
} else if (deviceName == "pitcairn") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
if (is64bit) {
return new AMDILSIDevice64(ptr);
} else {
return new AMDILSIDevice32(ptr);
}
} else if (deviceName == "capeverde") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
if (is64bit) {
return new AMDILSIDevice64(ptr);
} else {
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDeviceInfo.h Wed Sep 12 12:43:34 2012
@@ -24,50 +24,50 @@
// emulated with a sequence of software instructions, or not
// supported at all.
enum ExecutionMode {
- Unsupported = 0, // Unsupported feature on the card(Default value)
- Software, // This is the execution mode that is set if the
+ Unsupported = 0, // Unsupported feature on the card(Default value)
+ Software, // This is the execution mode that is set if the
// feature is emulated in software
- Hardware // This execution mode is set if the feature exists
+ Hardware // This execution mode is set if the feature exists
// natively in hardware
};
// Any changes to this needs to have a corresponding update to the
// twiki page GPUMetadataABI
enum Caps {
- HalfOps = 0x1, // Half float is supported or not.
- DoubleOps = 0x2, // Double is supported or not.
- ByteOps = 0x3, // Byte(char) is support or not.
- ShortOps = 0x4, // Short is supported or not.
- LongOps = 0x5, // Long is supported or not.
- Images = 0x6, // Images are supported or not.
- ByteStores = 0x7, // ByteStores available(!HD4XXX).
- ConstantMem = 0x8, // Constant/CB memory.
- LocalMem = 0x9, // Local/LDS memory.
- PrivateMem = 0xA, // Scratch/Private/Stack memory.
- RegionMem = 0xB, // OCL GDS Memory Extension.
- FMA = 0xC, // Use HW FMA or SW FMA.
- ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
- MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
- PPAMode = 0xF, // UAV Per Pointer Allocation Mode capability
- NoAlias = 0x10, // Cached loads.
- Signed24BitOps = 0x11, // Peephole Optimization.
+ HalfOps = 0x1, // Half float is supported or not.
+ DoubleOps = 0x2, // Double is supported or not.
+ ByteOps = 0x3, // Byte(char) is support or not.
+ ShortOps = 0x4, // Short is supported or not.
+ LongOps = 0x5, // Long is supported or not.
+ Images = 0x6, // Images are supported or not.
+ ByteStores = 0x7, // ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, // Constant/CB memory.
+ LocalMem = 0x9, // Local/LDS memory.
+ PrivateMem = 0xA, // Scratch/Private/Stack memory.
+ RegionMem = 0xB, // OCL GDS Memory Extension.
+ FMA = 0xC, // Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
+ PPAMode = 0xF, // UAV Per Pointer Allocation Mode capability
+ NoAlias = 0x10, // Cached loads.
+ Signed24BitOps = 0x11, // Peephole Optimization.
// Debug mode implies that no hardware features or optimizations
// are performned and that all memory access go through a single
// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
- Debug = 0x12, // Debug mode is enabled.
- CachedMem = 0x13, // Cached mem is available or not.
- BarrierDetect = 0x14, // Detect duplicate barriers.
- Semaphore = 0x15, // Flag to specify that semaphores are supported.
- ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
- ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
- TmrReg = 0x18, // Flag to specify if Tmr register is supported.
- NoInline = 0x19, // Flag to specify that no inlining should occur.
- MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
- HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
- ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
- PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
- ByteGDSOps = 0x1F, // Flag to specify if byte GDS ops are available.
- FlatMem = 0x20, // Flag to specify if device supports flat addressing.
+ Debug = 0x12, // Debug mode is enabled.
+ CachedMem = 0x13, // Cached mem is available or not.
+ BarrierDetect = 0x14, // Detect duplicate barriers.
+ Semaphore = 0x15, // Flag to specify that semaphores are supported.
+ ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, // Flag to specify if Tmr register is supported.
+ NoInline = 0x19, // Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
+ ByteGDSOps = 0x1F, // Flag to specify if byte GDS ops are available.
+ FlatMem = 0x20, // Flag to specify if device supports flat addressing.
// If more capabilities are required, then
// this number needs to be increased.
// All capabilities must come before this
@@ -77,18 +77,19 @@
// These have to be in order with the older generations
// having the lower number enumerations.
enum Generation {
- HD4XXX = 0, // 7XX based devices.
- HD5XXX, // Evergreen based devices.
- HD6XXX, // NI/Evergreen+ based devices.
- HD7XXX, // SI based devices.
- HD8XXX, // CI based devices.
- HDTEST, // Experimental feature testing device.
+ HD4XXX = 0, // 7XX based devices.
+ HD5XXX, // Evergreen based devices.
+ HD6XXX, // NI/Evergreen+ based devices.
+ HD7XXX, // SI based devices.
+ HD8XXX, // CI based devices.
+ HDTEST, // Experimental feature testing device.
HDNUMGEN
};
-
-
-} // namespace AMDILDeviceInfo
+} // namespace AMDILDeviceInfo
llvm::AMDILDevice*
-getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
+getDeviceFromName(const std::string &name,
+ llvm::AMDILSubtarget *ptr,
+ bool is64bit = false,
+ bool is64on32bit = false);
} // namespace llvm
#endif // _AMDILDEVICEINFO_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILDevices.h Wed Sep 12 12:43:34 2012
@@ -14,7 +14,6 @@
#ifndef __AMDIL_DEVICES_H_
#define __AMDIL_DEVICES_H_
// Include all of the device specific header files
-// This file is for Internal use only!
#include "AMDILDevice.h"
#include "AMDIL7XXDevice.h"
#include "AMDILEvergreenDevice.h"
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.cpp Wed Sep 12 12:43:34 2012
@@ -37,13 +37,11 @@
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-
// TODO: Add support for verbose.
AMDILEGAsmPrinter::AMDILEGAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS)
: AMDILAsmPrinter(ASM_PRINTER_ARGUMENTS)
{
}
-
AMDILEGAsmPrinter::~AMDILEGAsmPrinter()
{
}
@@ -82,7 +80,6 @@
}
emitMCallInst(MI, O, name);
}
-
bool
AMDILEGAsmPrinter::runOnMachineFunction(MachineFunction &lMF)
{
@@ -108,7 +105,7 @@
formatted_raw_ostream O(OFunStr);
const AMDILSubtarget *curTarget = mTM->getSubtargetImpl();
if (mDebugMode) {
- O << ";" ;
+ O << ";";
II->print(O);
}
if (isMacroFunc(II)) {
@@ -166,7 +163,7 @@
} else {
printOperand(II, x
, O
- );
+ );
}
if (!x) {
O << "), (";
@@ -201,7 +198,6 @@
mMFI->addCalledIntr(macronum);
}
} else {
-
// Print the assembly for the instruction.
// We want to make sure that we do HW constants
// before we do arena segment
@@ -222,6 +218,8 @@
}
O << "\tendif\n";
mMFI->addMetadata(";memory:compilerwrite");
+ } else if (II->getOpcode() == AMDIL::COPY) {
+ printCopy(II, O);
} else {
printInstruction(II, O);
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGAsmPrinter.h Wed Sep 12 12:43:34 2012
@@ -55,7 +55,6 @@
//
virtual void
emitMacroFunc(const MachineInstr *MI, OSTREAM_TYPE &O);
-
-}; // AMDILEGAsmPrinter
+}; // AMDILEGAsmPrinter
} // end of llvm namespace
#endif // _AMDIL_EG_ASM_PRINTER_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEGIOExpansion.cpp Wed Sep 12 12:43:34 2012
@@ -28,131 +28,58 @@
#include "llvm/Support/DebugLoc.h"
#include <cstdio>
using namespace llvm;
-AMDILEGIOExpansion::AMDILEGIOExpansion(TargetMachine &tm,
- CodeGenOpt::Level OptLevel) : AMDILImageExpansion(tm, OptLevel)
+
+namespace llvm
{
+extern void initializeAMDILEGIOExpansionPass(llvm::PassRegistry&);
}
-AMDILEGIOExpansion::~AMDILEGIOExpansion()
+char AMDILEGIOExpansion::ID = 0;
+INITIALIZE_PASS(AMDILEGIOExpansion, "eg-io-expansion",
+ "AMDIL EG/NI IO Expansion", false, false);
+
+AMDILEGIOExpansion::AMDILEGIOExpansion()
+ : MachineFunctionPass(ID)
{
+ initializeAMDILEGIOExpansionPass(*PassRegistry::getPassRegistry());
}
const char *AMDILEGIOExpansion::getPassName() const
{
return "AMDIL EG/NI IO Expansion Pass";
}
-bool
-AMDILEGIOExpansion::isImageIO(MachineInstr *MI)
+bool AMDILEGIOExpansion::runOnMachineFunction(MachineFunction& MF)
{
- if (!MI->getOperand(0).isGlobal()) {
- return false;
- }
- const llvm::StringRef& nameRef = MI->getOperand(0).getGlobal()->getName();
- const char *name = nameRef.data();
- if (nameRef.size() > 8 && !strncmp(name, "__amdil_", 8)) {
- name += 8;
- if (!strncmp(name, "sample_data", 11)
- || !strncmp(name, "write_image", 11)
- || !strncmp(name, "get_image", 9)
- ) {
- return true;
- }
- }
- return false;
+ AMDILEGIOExpansionImpl impl(MF);
+ return impl.run();
}
bool
-AMDILEGIOExpansion::isIOInstruction(MachineInstr *MI)
+AMDILEGIOExpansionImpl::isIOInstruction(MachineInstr *MI)
{
if (!MI) {
return false;
}
- switch (MI->getOpcode()) {
- default:
- return AMDILIOExpansion::isIOInstruction(MI);
- case AMDIL::IMAGE1D_READ:
- case AMDIL::IMAGE1D_READ_UNNORM:
- case AMDIL::IMAGE1D_WRITE:
- case AMDIL::IMAGE1D_INFO0:
- case AMDIL::IMAGE1D_INFO1:
- case AMDIL::IMAGE1DA_READ:
- case AMDIL::IMAGE1DA_READ_UNNORM:
- case AMDIL::IMAGE1DA_WRITE:
- case AMDIL::IMAGE1DA_INFO0:
- case AMDIL::IMAGE1DA_INFO1:
- case AMDIL::IMAGE1DB_TXLD:
- case AMDIL::IMAGE1DB_READ:
- case AMDIL::IMAGE1DB_READ_UNNORM:
- case AMDIL::IMAGE1DB_WRITE:
- case AMDIL::IMAGE1DB_INFO0:
- case AMDIL::IMAGE1DB_INFO1:
- case AMDIL::IMAGE2D_READ:
- case AMDIL::IMAGE2D_READ_UNNORM:
- case AMDIL::IMAGE2D_WRITE:
- case AMDIL::IMAGE2D_INFO0:
- case AMDIL::IMAGE2D_INFO1:
- case AMDIL::IMAGE2DA_READ:
- case AMDIL::IMAGE2DA_READ_UNNORM:
- case AMDIL::IMAGE2DA_WRITE:
- case AMDIL::IMAGE2DA_INFO0:
- case AMDIL::IMAGE2DA_INFO1:
- case AMDIL::IMAGE3D_READ:
- case AMDIL::IMAGE3D_READ_UNNORM:
- case AMDIL::IMAGE3D_WRITE:
- case AMDIL::IMAGE3D_INFO0:
- case AMDIL::IMAGE3D_INFO1:
+ if (isImageInst(MI)) {
return true;
- };
- return false;
+ }
+ return AMDILIOExpansionImpl::isIOInstruction(MI);
}
void
-AMDILEGIOExpansion::expandIOInstruction(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandIOInstruction(MachineInstr *MI)
{
assert(isIOInstruction(MI) && "Must be an IO instruction to "
"be passed to this function!");
- switch (MI->getOpcode()) {
- default:
- AMDILIOExpansion::expandIOInstruction(MI);
- break;
- case AMDIL::IMAGE1D_READ:
- case AMDIL::IMAGE1DA_READ:
- case AMDIL::IMAGE1DB_TXLD:
- case AMDIL::IMAGE1DB_READ:
- case AMDIL::IMAGE2D_READ:
- case AMDIL::IMAGE2DA_READ:
- case AMDIL::IMAGE3D_READ:
- case AMDIL::IMAGE1D_READ_UNNORM:
- case AMDIL::IMAGE1DA_READ_UNNORM:
- case AMDIL::IMAGE1DB_READ_UNNORM:
- case AMDIL::IMAGE2D_READ_UNNORM:
- case AMDIL::IMAGE2DA_READ_UNNORM:
- case AMDIL::IMAGE3D_READ_UNNORM:
+ if (isReadImageInst(MI) || isImageTXLDInst(MI)) {
expandImageLoad(mBB, MI);
- break;
- case AMDIL::IMAGE1D_WRITE:
- case AMDIL::IMAGE1DA_WRITE:
- case AMDIL::IMAGE1DB_WRITE:
- case AMDIL::IMAGE2D_WRITE:
- case AMDIL::IMAGE2DA_WRITE:
- case AMDIL::IMAGE3D_WRITE:
+ } else if (isWriteImageInst(MI)) {
expandImageStore(mBB, MI);
- break;
- case AMDIL::IMAGE1D_INFO0:
- case AMDIL::IMAGE1D_INFO1:
- case AMDIL::IMAGE1DA_INFO0:
- case AMDIL::IMAGE1DA_INFO1:
- case AMDIL::IMAGE1DB_INFO0:
- case AMDIL::IMAGE1DB_INFO1:
- case AMDIL::IMAGE2D_INFO0:
- case AMDIL::IMAGE2D_INFO1:
- case AMDIL::IMAGE2DA_INFO0:
- case AMDIL::IMAGE2DA_INFO1:
- case AMDIL::IMAGE3D_INFO0:
- case AMDIL::IMAGE3D_INFO1:
+ } else if (isImageInfoInst(MI)) {
expandImageParam(mBB, MI);
- break;
- };
+ } else {
+ AMDILIOExpansionImpl::expandIOInstruction(MI);
+ }
}
bool
-AMDILEGIOExpansion::isCacheableOp(MachineInstr *MI)
+AMDILEGIOExpansionImpl::isCacheableOp(MachineInstr *MI)
{
AMDILAS::InstrResEnc curRes;
getAsmPrinterFlags(MI, curRes);
@@ -164,7 +91,7 @@
}
}
bool
-AMDILEGIOExpansion::isArenaOp(MachineInstr *MI)
+AMDILEGIOExpansionImpl::isArenaOp(MachineInstr *MI)
{
AMDILAS::InstrResEnc curRes;
getAsmPrinterFlags(MI, curRes);
@@ -173,75 +100,79 @@
|| curRes.bits.ResourceID >= ARENA_SEGMENT_RESERVED_UAVS;
}
void
-AMDILEGIOExpansion::expandPackedData(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandPackedData(MachineInstr *MI, uint32_t &dataReg)
{
- if (!isPackedData(MI)) {
- return;
- }
- // There is a bug in the CAL compiler that incorrectly
- // errors when the UBIT_INSERT instruction is used.
- if (mSTM->calVersion() < CAL_VERSION_SC_137) {
- AMDIL789IOExpansion::expandPackedData(MI);
+ if (!isPackedInst(MI)) {
return;
}
DebugLoc DL = MI->getDebugLoc();
+ uint32_t packedReg = getPackedReg(dataReg, getPackedID(MI));
// If we have packed data, then the shift size is no longer
// the same as the load size and we need to adjust accordingly
switch(getPackedID(MI)) {
default:
break;
- case PACK_V2I8: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
-
+ case PACK_V2I8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERTi32rrrr), AMDIL::Rx1011)
.addImm(mMFI->addi32Literal(8)).addImm(mMFI->addi32Literal(8))
- .addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
- }
- break;
- case PACK_V4I8: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1012)
-
- .addReg(AMDIL::R1011);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
-
- .addReg(AMDIL::R1011);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32),
+ .addReg(getCompReg(dataReg, sub_y_comp, sub_w_comp))
+ .addReg(getCompReg(dataReg, sub_x_comp, sub_z_comp));
+ dataReg = AMDIL::Rx1011;
+ break;
+ case PACK_V4I8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHIv2i64r), AMDIL::Rxy1012)
+ .addReg(dataReg);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLOv2i64r), AMDIL::Rxy1011)
+ .addReg(dataReg);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERTv2i32rrrr),
AMDIL::Rxy1011)
-
.addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
.addImm(mMFI->addi64Literal(8ULL | (8ULL << 32)))
.addReg(AMDIL::Rxy1012).addReg(AMDIL::Rxy1011);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERTi32rrrr), AMDIL::Rx1011)
.addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
.addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
- }
- break;
- case PACK_V2I16: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_i32), AMDIL::Rx1011)
-
+ dataReg = AMDIL::Rx1011;
+ break;
+ case PACK_V2I16:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERTi32rrrr), AMDIL::Rx1011)
.addImm(mMFI->addi32Literal(16)).addImm(mMFI->addi32Literal(16))
- .addReg(AMDIL::Ry1011).addReg(AMDIL::Rx1011);
- }
- break;
- case PACK_V4I16: {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHI_v2i64), AMDIL::Rxy1012)
-
- .addReg(AMDIL::R1011);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
-
- .addReg(AMDIL::R1011);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_INSERT_v2i32), AMDIL::Rxy1011)
-
+ .addReg(getCompReg(dataReg, sub_y_comp, sub_w_comp))
+ .addReg(getCompReg(dataReg, sub_x_comp, sub_z_comp));
+ dataReg = AMDIL::Rx1011;
+ break;
+ case PACK_V4I16:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LHIv2i64r), AMDIL::Rxy1012)
+ .addReg(dataReg);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LLOv2i64r), AMDIL::Rxy1011)
+ .addReg(dataReg);
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UBIT_INSERTv2i32rrrr), AMDIL::Rxy1011)
.addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
.addImm(mMFI->addi64Literal(16ULL | (16ULL << 32)))
.addReg(AMDIL::Rxy1012).addReg(AMDIL::Rxy1011);
- }
- break;
+ dataReg = AMDIL::Rxy1011;
+ break;
case UNPACK_V2I8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_EXTRACTv2i32rrr), dataReg)
+ .addImm(mMFI->addi32Literal(8))
+ .addImm(mMFI->addi64Literal(8ULL << 32))
+ .addReg(packedReg);
+ break;
case UNPACK_V4I8:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_EXTRACTv4i32rrr), dataReg)
+ .addImm(mMFI->addi32Literal(8))
+ .addImm(mMFI->addi128Literal(8ULL << 32, (16ULL | (24ULL << 32))))
+ .addReg(packedReg);
+ break;
case UNPACK_V2I16:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UBIT_EXTRACTv2i32rrr), dataReg)
+ .addImm(mMFI->addi32Literal(16))
+ .addImm(mMFI->addi64Literal(16ULL << 32))
+ .addReg(packedReg);
+ break;
case UNPACK_V4I16:
- AMDIL789IOExpansion::expandPackedData(MI);
+ AMDIL789IOExpansionImpl::expandPackedData(MI, dataReg);
break;
};
}
@@ -254,223 +185,218 @@
}
return true;
}
-
void
-AMDILEGIOExpansion::expandGlobalLoad(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandGlobalLoad(MachineInstr *MI)
{
bool usesArena = isArenaOp(MI);
bool cacheable = isCacheableOp(MI);
- bool aligned = mSTM->calVersion() >= CAL_CACHED_ALIGNED_UAVS
- && isAlignedInst(MI);
+ bool aligned = isAlignedInst(MI);
uint32_t ID = getPointerID(MI);
mKM->setOutputInst();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions are generated before the current MI.
- expandLoadStartCode(MI);
- expandArenaSetup(MI);
+ expandLoadStartCode(MI, addyReg);
+ expandArenaSetup(MI, addyReg);
DebugLoc DL = MI->getDebugLoc();
if (getMemorySize(MI) == 1) {
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i8), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32LOADi8), dataReg)
+ .addReg(addyReg)
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
(0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IEQ_v4i32), AMDIL::R1012)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::EQv4i32rr), AMDIL::R1012)
.addReg(AMDIL::R1008)
.addImm(mMFI->addi32Literal(0));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1012)
.addImm(mMFI->addi32Literal(0))
.addImm(mMFI->addi32Literal(24));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Ry1012)
.addImm(mMFI->addi32Literal(8))
.addReg(AMDIL::Rx1008);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rz1012)
.addImm(mMFI->addi32Literal(16))
.addReg(AMDIL::Rx1008);
if (cacheable) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADCACHEDi32),
AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
-
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADi32),
AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
-
}
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_v4i8), AMDIL::R1011)
- .addReg(AMDIL::Rx1011)
- .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACTi32rrr), dataReg)
+ .addImm(mMFI->addi32Literal(8))
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1011);
}
} else if (getMemorySize(MI) == 2) {
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i16), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32LOADi16), dataReg)
+ .addReg(addyReg)
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi32Literal(16))
.addImm(mMFI->addi32Literal(0));
if (cacheable) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADCACHEDi32),
AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
-
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADi32),
AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
-
}
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i16), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
- .addReg(AMDIL::Rx1008);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACTi32rrr), dataReg)
+ .addImm(mMFI->addi32Literal(16))
+ .addReg(AMDIL::Rx1008)
+ .addReg(AMDIL::Rx1011);
}
} else if (getMemorySize(MI) == 4) {
+ uint32_t opc = AMDIL::UAVRAW32LOADi32;
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
- .addImm(ID);
- } else {
- if (cacheable) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_i32),
- AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
-
- } else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_i32),
- AMDIL::Rx1011).addReg(AMDIL::Rx1010).addImm(ID);
-
- }
+ opc = AMDIL::UAVARENA32LOADi32;
+ } else if (cacheable) {
+ opc = AMDIL::UAVRAW32LOADCACHEDi32;
}
+ BuildMI(*mBB, MI, DL, mTII->get(opc), getPackedReg(dataReg, getPackedID(MI)))
+ .addReg(addyReg)
+ .addImm(ID);
} else if (getMemorySize(MI) == 8) {
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32LOADi32),
+ getCompReg(dataReg, sub_x_comp))
+ .addReg(getCompReg(addyReg, sub_x_comp))
.addImm(ID);
if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Ry1011)
- .addReg(AMDIL::Ry1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32LOADi32),
+ getCompReg(dataReg, sub_y_comp))
+ .addReg(getCompReg(addyReg, sub_y_comp))
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
-
- .addReg(AMDIL::R1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(2);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVARENA32LOADi32), AMDIL::Rx1008)
.addReg(AMDIL::Rx1007)
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATEi64rr), dataReg)
+ .addReg(getCompReg(dataReg, sub_x_comp))
.addReg(AMDIL::Rx1008);
}
} else {
if (cacheable) {
if (aligned) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHEDALIGNED_v2i32),
- AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVRAW32LOADCACHEDALIGNEDv2i32), AMDIL::Rxy1011)
+ .addReg(addyReg).addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v2i32),
- AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVRAW32LOADCACHEDv2i32), AMDIL::Rxy1011)
+ .addReg(addyReg).addImm(ID);
}
-
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v2i32),
- AMDIL::Rxy1011).addReg(AMDIL::Rx1010).addImm(ID);
-
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVRAW32LOADv2i32), AMDIL::Rxy1011)
+ .addReg(addyReg).addImm(ID);
}
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::COPY),
+ getPackedReg(dataReg, getPackedID(MI)))
+ .addReg(AMDIL::Rxy1011);
}
} else {
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32LOADi32),
+ getCompReg(dataReg, sub_x_comp))
+ .addReg(getCompReg(addyReg, sub_x_comp))
.addImm(ID);
if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Ry1011)
- .addReg(AMDIL::Ry1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32LOADi32),
+ getCompReg(dataReg, sub_y_comp))
+ .addReg(getCompReg(addyReg, sub_y_comp))
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rz1011)
- .addReg(AMDIL::Rz1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32LOADi32),
+ getCompReg(dataReg, sub_z_comp))
+ .addReg(getCompReg(addyReg, sub_z_comp))
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rw1011)
- .addReg(AMDIL::Rw1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32LOADi32),
+ getCompReg(dataReg, sub_w_comp))
+ .addReg(getCompReg(addyReg, sub_w_comp))
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
- .addReg(AMDIL::R1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(2);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVARENA32LOADi32), AMDIL::Rx1008)
.addReg(AMDIL::Rx1007)
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATEi64rr),
+ getCompReg(dataReg, sub_xy_comp))
+ .addReg(getCompReg(dataReg, sub_x_comp))
.addReg(AMDIL::Rx1008);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
- .addReg(AMDIL::R1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(3);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVARENA32LOADi32), AMDIL::Rx1008)
.addReg(AMDIL::Rx1007)
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
-
- .addReg(AMDIL::R1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(4);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENALOAD_i32), AMDIL::Rx1006)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVARENA32LOADi32), AMDIL::Rx1006)
.addReg(AMDIL::Rx1007)
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rzw1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATEi64rr),
+ getCompReg(dataReg, sub_zw_comp))
.addReg(AMDIL::Rx1006)
.addReg(AMDIL::Rx1008);
}
} else {
if (cacheable) {
if (aligned) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHEDALIGNED_v4i32),
- AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADCACHEDALIGNEDv4i32),
+ dataReg).addReg(addyReg).addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOADCACHED_v4i32),
- AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADCACHEDv4i32),
+ dataReg).addReg(addyReg).addImm(ID);
}
-
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWLOAD_v4i32),
- AMDIL::R1011).addReg(AMDIL::Rx1010).addImm(ID);
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32LOADv4i32),
+ dataReg).addReg(addyReg).addImm(ID);
}
}
}
- expandPackedData(MI);
- unsigned dataReg = expandExtendLoad(MI);
- if (!dataReg) {
- dataReg = getDataReg(MI);
- }
- BuildMI(*mBB, MI, MI->getDebugLoc(),
- mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)))
- .addOperand(MI->getOperand(0))
- .addReg(dataReg);
+ expandPackedData(MI, dataReg);
+ expandExtendLoad(MI, dataReg);
MI->getOperand(0).setReg(dataReg);
}
-
void
-AMDILEGIOExpansion::expandRegionLoad(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandRegionLoad(MachineInstr *MI)
{
bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
if (!mSTM->device()->isSupported(AMDILDeviceInfo::RegionMem)) {
@@ -492,143 +418,123 @@
gID = mSTM->device()->getResourceID(AMDILDevice::GDS_ID);
mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
}
- unsigned dstReg = AMDIL::R1011;
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions are generated before the current MI.
- expandLoadStartCode(MI);
+ expandLoadStartCode(MI, addyReg);
switch (getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
- .addReg(AMDIL::Rx1010)
- .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1010)
+ .addReg(addyReg)
+ .addImm(mMFI->addi128Literal(4ULL << 32, 8ULL | (12ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_x_comp))
+ .addReg(AMDIL::Rx1010)
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_y_comp))
.addReg(AMDIL::Ry1010)
-
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rz1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_z_comp))
.addReg(AMDIL::Rz1010)
-
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rw1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_w_comp))
.addReg(AMDIL::Rw1010)
-
.addImm(gID);
break;
case 1:
if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
- ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ ? AMDIL::UMULi32rr : AMDIL::UMUL24i32rr;
BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r), dataReg)
.addReg(AMDIL::Rx1010)
-
.addImm(gID);
- // The instruction would normally fit in right here so everything created
- // after this point needs to go into the afterInst vector.
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACTi32rrr), dataReg)
.addImm(mMFI->addi32Literal(8))
.addReg(AMDIL::Rx1008)
- .addReg(AMDIL::Rx1011);
- dstReg = AMDIL::Rx1011;
+ .addReg(dataReg);
} else {
if (isSWSExtLoadInst(MI)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_i8), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi8r), dataReg)
+ .addReg(addyReg)
.addImm(gID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_u8), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADu8r), dataReg)
+ .addReg(addyReg)
.addImm(gID);
}
}
break;
case 2:
if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
- ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ ? AMDIL::UMULi32rr : AMDIL::UMUL24i32rr;
BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r), dataReg)
.addReg(AMDIL::Rx1010)
-
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACTi32rrr), dataReg)
.addImm(mMFI->addi32Literal(16))
.addReg(AMDIL::Rx1008)
- .addReg(AMDIL::Rx1011);
+ .addReg(dataReg);
} else {
if (isSWSExtLoadInst(MI)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_i16), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi16r), dataReg)
+ .addReg(addyReg)
.addImm(gID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD_u16), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADu16r), dataReg)
+ .addReg(addyReg)
.addImm(gID);
}
}
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getPackedReg(dataReg, getPackedID(MI)))
+ .addReg(addyReg)
.addImm(gID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
- .addReg(AMDIL::Rx1010)
-
- .addImm(mMFI->addi64Literal(1ULL << 32));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv2i32rr), AMDIL::Rxy1010)
+ .addReg(addyReg)
+ .addImm(mMFI->addi64Literal(4ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_x_comp))
+ .addReg(addyReg)
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSLOAD), AMDIL::Ry1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32LOADi32r),
+ getCompReg(dataReg, sub_y_comp))
.addReg(AMDIL::Ry1010)
-
.addImm(gID);
break;
};
- expandPackedData(MI);
- unsigned dataReg = expandExtendLoad(MI);
- if (!dataReg) {
- dataReg = getDataReg(MI);
- }
- BuildMI(*mBB, MI, MI->getDebugLoc(),
- mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)))
- .addOperand(MI->getOperand(0))
- .addReg(dataReg);
+ expandPackedData(MI, dataReg);
+ expandExtendLoad(MI, dataReg);
MI->getOperand(0).setReg(dataReg);
}
void
-AMDILEGIOExpansion::expandLocalLoad(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandLocalLoad(MachineInstr *MI)
{
bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
if (!HWLocal || !isHardwareLocal(MI)) {
@@ -645,281 +551,249 @@
}
DebugLoc DL = MI->getDebugLoc();
unsigned mulOp = 0;
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions are generated before the current MI.
- expandLoadStartCode(MI);
+ expandLoadStartCode(MI, addyReg);
switch (getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC_v4i32), AMDIL::R1011)
-
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADv4i32r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOADVEC_v2i32), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADv2i32r), AMDIL::Rxy1011)
+ .addReg(addyReg)
.addImm(lID);
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::COPY),
+ getPackedReg(dataReg, getPackedID(MI)))
+ .addReg(AMDIL::Rxy1011);
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADi32r),
+ getPackedReg(dataReg, getPackedID(MI)))
+ .addReg(addyReg)
.addImm(lID);
break;
case 1:
if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
- ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ ? AMDIL::UMULi32rr : AMDIL::UMUL24i32rr;
BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADi32r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACTi32rrr), dataReg)
.addImm(mMFI->addi32Literal(8))
.addReg(AMDIL::Rx1008)
- .addReg(AMDIL::Rx1011);
+ .addReg(dataReg);
} else {
if (isSWSExtLoadInst(MI)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_i8), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADi8r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_u8), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADu8r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
}
}
break;
case 2:
if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
mulOp = (mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem))
- ? AMDIL::UMUL_i32 : AMDIL::UMUL24_i32;
+ ? AMDIL::UMULi32rr : AMDIL::UMUL24i32rr;
BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1010)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(0xFFFFFFFC));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADi32r), dataReg)
.addReg(AMDIL::Rx1010)
-
.addImm(lID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACT_i32), AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::IBIT_EXTRACTi32rrr), dataReg)
.addImm(mMFI->addi32Literal(16))
.addReg(AMDIL::Rx1008)
- .addReg(AMDIL::Rx1011);
+ .addReg(dataReg);
} else {
if (isSWSExtLoadInst(MI)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_i16), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADi16r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSLOAD_u16), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32LOADu16r), dataReg)
+ .addReg(addyReg)
.addImm(lID);
}
}
break;
}
- expandPackedData(MI);
- unsigned dataReg = expandExtendLoad(MI);
- if (!dataReg) {
- dataReg = getDataReg(MI);
- }
- BuildMI(*mBB, MI, MI->getDebugLoc(),
- mTII->get(getMoveInstFromID(
- MI->getDesc().OpInfo[0].RegClass)))
- .addOperand(MI->getOperand(0))
- .addReg(dataReg);
+ expandPackedData(MI, dataReg);
+ expandExtendLoad(MI, dataReg);
MI->getOperand(0).setReg(dataReg);
}
void
-AMDILEGIOExpansion::expandGlobalStore(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandGlobalStore(MachineInstr *MI)
{
bool usesArena = isArenaOp(MI);
uint32_t ID = getPointerID(MI);
mKM->setOutputInst();
DebugLoc DL = MI->getDebugLoc();
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = 0;
+ if (MI->getOperand(0).isReg()) {
+ dataReg = MI->getOperand(0).getReg();
+ }
// These instructions are expandted before the current MI.
- expandStoreSetupCode(MI);
- expandArenaSetup(MI);
+ expandStoreSetupCode(MI, addyReg, dataReg);
+ expandArenaSetup(MI, addyReg);
switch (getMemorySize(MI)) {
default:
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi32),
+ getCompReg(addyReg, sub_x_comp))
+ .addReg(getCompReg(dataReg, sub_x_comp))
.addImm(ID);
if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Ry1010)
- .addReg(AMDIL::Ry1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi32),
+ getCompReg(addyReg, sub_y_comp))
+ .addReg(getCompReg(dataReg, sub_y_comp))
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rz1010)
- .addReg(AMDIL::Rz1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi32),
+ getCompReg(addyReg, sub_z_comp))
+ .addReg(getCompReg(dataReg, sub_z_comp))
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rw1010)
- .addReg(AMDIL::Rw1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi32),
+ getCompReg(addyReg, sub_w_comp))
+ .addReg(getCompReg(dataReg, sub_w_comp))
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
-
- .addReg(AMDIL::R1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(2);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
-
- .addReg(AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1008)
+ .addReg(dataReg)
.addImm(2);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVARENA32STOREi32), AMDIL::Rx1007)
.addReg(AMDIL::Rx1008)
-
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
-
- .addReg(AMDIL::R1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(3);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
-
- .addReg(AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1008)
+ .addReg(dataReg)
.addImm(3);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVARENA32STOREi32), AMDIL::Rx1007)
.addReg(AMDIL::Rx1008)
-
.addImm(ID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
-
- .addReg(AMDIL::R1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(4);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
-
- .addReg(AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1008)
+ .addReg(dataReg)
.addImm(4);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVARENA32STOREi32), AMDIL::Rx1007)
.addReg(AMDIL::Rx1008)
-
.addImm(ID);
}
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v4i32), AMDIL::MEM)
- .addReg(AMDIL::Rx1010)
-
- .addReg(AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREv4i32), AMDIL::MEM)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
}
break;
case 1:
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), dataReg)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i8), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi8), addyReg)
+ .addReg(dataReg)
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREi32), AMDIL::MEMx)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
}
break;
case 2:
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), dataReg)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i16), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi16), addyReg)
+ .addReg(dataReg)
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREi32), AMDIL::MEMx)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
}
break;
case 4:
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi32), addyReg)
+ .addReg(dataReg)
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_i32), AMDIL::MEMx)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREi32), AMDIL::MEMx)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
}
break;
case 8:
if (usesArena) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi32),
+ getCompReg(addyReg, sub_x_comp, sub_z_comp))
+ .addReg(getCompReg(dataReg, sub_x_comp, sub_z_comp))
.addImm(ID);
if (mSTM->device()->usesHardware(AMDILDeviceInfo::ArenaVectors)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Ry1010)
- .addReg(AMDIL::Ry1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENA32STOREi32),
+ getCompReg(addyReg, sub_y_comp, sub_w_comp))
+ .addReg(getCompReg(dataReg, sub_y_comp, sub_w_comp))
.addImm(ID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1007)
- .addReg(AMDIL::Rxy1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1007)
+ .addReg(addyReg)
.addImm(2);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACT_v4i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rxy1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VEXTRACTv4i32r), AMDIL::Rx1008)
+ .addReg(dataReg)
.addImm(2);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVARENASTORE_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(
+ AMDIL::UAVARENA32STOREi32), AMDIL::Rx1007)
.addReg(AMDIL::Rx1008)
-
.addImm(ID);
}
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAWSTORE_v2i32), AMDIL::MEMxy)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rxy1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::UAVRAW32STOREv2i32), AMDIL::MEMxy)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(ID);
}
break;
};
}
void
-AMDILEGIOExpansion::expandRegionStore(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandRegionStore(MachineInstr *MI)
{
bool HWRegion = mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem);
if (!HWRegion || !isHardwareRegion(MI)) {
@@ -936,68 +810,58 @@
mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
}
DebugLoc DL = MI->getDebugLoc();
- unsigned mulOp = HWRegion ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+ unsigned mulOp = HWRegion ? AMDIL::UMUL24i32rr : AMDIL::UMUL24i32rr;
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions are expandted before the current MI.
- expandStoreSetupCode(MI);
- expandArenaSetup(MI);
+ expandStoreSetupCode(MI, addyReg, dataReg);
switch (getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
- .addReg(AMDIL::Rx1010)
- .addImm(mMFI->addi128Literal(1ULL << 32, 2ULL | (3ULL << 32)));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1010)
+ .addReg(addyReg)
+ .addImm(mMFI->addi128Literal(4ULL << 32, 8ULL | (12ULL << 32)));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rx1010)
+ .addReg(getCompReg(dataReg, sub_x_comp))
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
- .addReg(AMDIL::Ry1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Ry1010)
+ .addReg(getCompReg(dataReg, sub_y_comp))
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rz1010)
- .addReg(AMDIL::Rz1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rz1010)
+ .addReg(getCompReg(dataReg, sub_z_comp))
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rw1010)
- .addReg(AMDIL::Rw1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rw1010)
+ .addReg(getCompReg(dataReg, sub_w_comp))
.addImm(gID);
break;
case 1:
if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1012)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
-
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
(0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1006)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1007)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(0xFFFFFF00))
.addImm(mMFI->addi32Literal(0x00FFFFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Ry1007)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Ry1007)
.addReg(AMDIL::Ry1008)
.addReg(AMDIL::Rx1007)
-
.addImm(mMFI->addi32Literal(0xFF00FFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rz1012)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rz1012)
.addReg(AMDIL::Rz1008)
.addReg(AMDIL::Rx1007)
-
.addImm(mMFI->addi32Literal(0xFFFF00FF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLi32i32rr), AMDIL::Rx1011)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1007);
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_R_MSKOR_NORET))
@@ -1005,40 +869,33 @@
.addImm(mMFI->addi32Literal(0))
.addReg(AMDIL::Rx1012)
.addReg(AMDIL::Rx1011)
-
.addImm(gID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_i8), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi8r), addyReg)
+ .addReg(dataReg)
.addImm(gID);
}
break;
case 2:
if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteGDSOps)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0x0000FFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1012)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(0x0000FFFF))
.addImm(mMFI->addi32Literal(0xFFFF0000));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(16))
.addImm(mMFI->addi32Literal(0));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLi32i32rr), AMDIL::Rx1011)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_R_MSKOR_NORET))
@@ -1046,40 +903,33 @@
.addImm(mMFI->addi32Literal(0))
.addReg(AMDIL::Rx1012)
.addReg(AMDIL::Rx1011)
-
.addImm(gID);
} else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE_i16), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi16r), addyReg)
+ .addReg(dataReg)
.addImm(gID);
}
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), addyReg)
+ .addReg(dataReg)
.addImm(gID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
- .addReg(AMDIL::Rx1010)
-
- .addImm(mMFI->addi64Literal(1ULL << 32));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv2i32rr), AMDIL::Rxy1010)
+ .addReg(addyReg)
+ .addImm(mMFI->addi64Literal(4ULL << 32));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Rx1010)
+ .addReg(getCompReg(dataReg, sub_x_comp))
.addImm(gID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDSSTORE), AMDIL::Ry1010)
- .addReg(AMDIL::Ry1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::GDS32STOREi32r), AMDIL::Ry1010)
+ .addReg(getCompReg(dataReg, sub_y_comp))
.addImm(gID);
break;
};
}
-
void
-AMDILEGIOExpansion::expandLocalStore(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandLocalStore(MachineInstr *MI)
{
bool HWLocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
if (!HWLocal || !isHardwareLocal(MI)) {
@@ -1095,161 +945,109 @@
lID = mSTM->device()->getResourceID(AMDILDevice::LDS_ID);
mMFI->addErrorMsg(amd::CompilerWarningMessage[RECOVERABLE_ERROR]);
}
- unsigned mulOp = HWLocal ? AMDIL::UMUL24_i32 : AMDIL::UMUL24_i32;
+ unsigned mulOp = HWLocal ? AMDIL::UMUL24i32rr : AMDIL::UMUL24i32rr;
+ uint32_t addyReg = MI->getOperand(1).getReg();
+ uint32_t dataReg = MI->getOperand(0).getReg();
// These instructions are expandted before the current MI.
- expandStoreSetupCode(MI);
+ expandStoreSetupCode(MI, addyReg, dataReg);
switch (getMemorySize(MI)) {
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC_v4i32), AMDIL::MEM)
-
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::R1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32STOREv4i32r), AMDIL::MEM)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(lID);
break;
case 8:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTOREVEC_v2i32), AMDIL::MEMxy)
- .addReg(AMDIL::Rx1010)
- .addReg(AMDIL::Rxy1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32STOREv2i32r), AMDIL::MEMxy)
+ .addReg(addyReg)
+ .addReg(dataReg)
.addImm(lID);
break;
case 4:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE), AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32STOREi32r), addyReg)
+ .addReg(dataReg)
.addImm(lID);
break;
case 1:
if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1012)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1012)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
-
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1008)
.addReg(AMDIL::Rx1008)
.addImm(mMFI->addi128Literal(0xFFFFFFFFULL << 32,
(0xFFFFFFFEULL | (0xFFFFFFFDULL << 32))));
BuildMI(*mBB, MI, DL, mTII->get(mulOp), AMDIL::Rx1006)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(8));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1007)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(0xFFFFFF00))
.addImm(mMFI->addi32Literal(0x00FFFFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1007)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1007)
.addReg(AMDIL::Ry1008)
.addReg(AMDIL::Rx1007)
-
.addImm(mMFI->addi32Literal(0xFF00FFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1012)
.addReg(AMDIL::Rz1008)
.addReg(AMDIL::Rx1007)
-
.addImm(mMFI->addi32Literal(0xFFFF00FF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLi32i32rr), AMDIL::Rx1011)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1006);
-
- if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_MSKOR_NORET))
- .addReg(AMDIL::Rx1010)
- .addImm(mMFI->addi32Literal(0))
- .addReg(AMDIL::Rx1012)
- .addReg(AMDIL::Rx1011)
-
- .addImm(lID);
- } else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
- AMDIL::Rx1010)
- .addReg(AMDIL::Rx1012)
-
- .addImm(lID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_OR_NORET),
- AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
- .addImm(lID);
- }
- } else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE_i8), AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_MSKOR_NORET))
+ .addReg(addyReg)
+ .addImm(mMFI->addi32Literal(0))
+ .addReg(AMDIL::Rx1012)
.addReg(AMDIL::Rx1011)
-
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32STOREi8r), addyReg)
+ .addReg(dataReg)
.addImm(lID);
}
break;
case 2:
if (!mSTM->device()->usesHardware(AMDILDeviceInfo::ByteLDSOps)) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0x0000FFFF));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::BINARY_AND_i32), AMDIL::Rx1008)
- .addReg(AMDIL::Rx1010)
-
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1008)
+ .addReg(addyReg)
.addImm(mMFI->addi32Literal(3));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHR_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(1));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1012)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1012)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(0x0000FFFF))
.addImm(mMFI->addi32Literal(0xFFFF0000));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CMOVLOG_i32), AMDIL::Rx1008)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SELECTi32rrr), AMDIL::Rx1008)
.addReg(AMDIL::Rx1008)
-
.addImm(mMFI->addi32Literal(16))
.addImm(mMFI->addi32Literal(0));
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHL_i32), AMDIL::Rx1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHLi32i32rr), AMDIL::Rx1011)
.addReg(AMDIL::Rx1011)
.addReg(AMDIL::Rx1008);
-
- if (mSTM->calVersion() >= CAL_VERSION_SC_137) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_MSKOR_NORET))
- .addReg(AMDIL::Rx1010)
- .addImm(mMFI->addi32Literal(0))
- .addReg(AMDIL::Rx1012)
- .addReg(AMDIL::Rx1011)
-
- .addImm(lID);
- } else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_ADD_NORET),
- AMDIL::Rx1010)
- .addReg(AMDIL::Rx1012)
-
- .addImm(lID);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_OR_NORET),
- AMDIL::Rx1010)
- .addReg(AMDIL::Rx1011)
-
- .addImm(lID);
- }
- } else {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDSSTORE_i16), AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ATOM_L_MSKOR_NORET))
+ .addReg(addyReg)
+ .addImm(mMFI->addi32Literal(0))
+ .addReg(AMDIL::Rx1012)
.addReg(AMDIL::Rx1011)
-
+ .addImm(lID);
+ } else {
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LDS32STOREi16r), addyReg)
+ .addReg(dataReg)
.addImm(lID);
}
break;
}
}
-
-
-void
-AMDILEGIOExpansion::expandStoreSetupCode(MachineInstr *MI)
-{
- AMDIL789IOExpansion::expandStoreSetupCode(MI);
-}
void
-AMDILEGIOExpansion::expandArenaSetup(MachineInstr *MI)
+AMDILEGIOExpansionImpl::expandArenaSetup(MachineInstr *MI, uint32_t &addyReg)
{
if (!isArenaOp(MI)) {
return;
@@ -1264,16 +1062,18 @@
case AMDIL::GPRF64RegClassID:
case AMDIL::GPRV2I32RegClassID:
case AMDIL::GPRV2F32RegClassID:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v2i32), AMDIL::Rxy1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv2i32rr), AMDIL::Rxy1010)
+ .addReg(addyReg)
.addImm(mMFI->addi64Literal(4ULL << 32));
+ addyReg = AMDIL::Rxy1010;
break;
default:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADD_v4i32), AMDIL::R1010)
- .addReg(AMDIL::Rx1010)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::ADDv4i32rr), AMDIL::R1010)
+ .addReg(addyReg)
.addImm(mMFI->addi128Literal(4ULL << 32, 8ULL | (12ULL << 32)));
+ addyReg = AMDIL::R1010;
break;
case AMDIL::GPRI8RegClassID:
case AMDIL::GPRV2I8RegClassID:
@@ -1285,4 +1085,3 @@
break;
};
}
-
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.cpp Wed Sep 12 12:43:34 2012
@@ -28,52 +28,37 @@
: TargetELFWriterInfo(is64bit, endian)
{
}
-
-AMDILELFWriterInfo::~AMDILELFWriterInfo()
-{
+AMDILELFWriterInfo::~AMDILELFWriterInfo() {
}
-
-unsigned AMDILELFWriterInfo::getRelocationType(unsigned MachineRelTy) const
-{
+unsigned AMDILELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
assert(0 && "What do we do here? Lets assert an analyze");
return 0;
}
-
-bool AMDILELFWriterInfo::hasRelocationAddend() const
-{
+bool AMDILELFWriterInfo::hasRelocationAddend() const {
assert(0 && "What do we do here? Lets assert an analyze");
return false;
}
-
long int AMDILELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
- long int Modifier) const
+ long int Modifier) const
{
assert(0 && "What do we do here? Lets assert an analyze");
return 0;
}
-
-unsigned AMDILELFWriterInfo::getRelocationTySize(unsigned RelTy) const
-{
+unsigned AMDILELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
assert(0 && "What do we do here? Lets assert an analyze");
return 0;
}
-
-bool AMDILELFWriterInfo::isPCRelativeRel(unsigned RelTy) const
-{
+bool AMDILELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
assert(0 && "What do we do here? Lets assert an analyze");
return false;
}
-
-unsigned AMDILELFWriterInfo::getAbsoluteLabelMachineRelTy() const
-{
+unsigned AMDILELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
assert(0 && "What do we do here? Lets assert an analyze");
return 0;
}
-
long int AMDILELFWriterInfo::computeRelocation(unsigned SymOffset,
- unsigned RelOffset,
- unsigned RelTy) const
-{
+ unsigned RelOffset,
+ unsigned RelTy) const {
assert(0 && "What do we do here? Lets assert an analyze");
return 0;
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILELFWriterInfo.h Wed Sep 12 12:43:34 2012
@@ -15,10 +15,8 @@
#define _AMDIL_ELF_WRITER_INFO_H_
#include "llvm/Target/TargetELFWriterInfo.h"
-namespace llvm
-{
-class AMDILELFWriterInfo : public TargetELFWriterInfo
-{
+namespace llvm {
+class AMDILELFWriterInfo : public TargetELFWriterInfo {
public:
AMDILELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
virtual ~AMDILELFWriterInfo();
@@ -34,7 +32,7 @@
/// getDefaultAddendForRelTy - Gets the default addend value for a
/// relocation entry based on the target ELF relocation type.
virtual long int getDefaultAddendForRelTy(unsigned RelTy,
- long int Modifier = 0) const;
+ long int Modifier = 0) const;
/// getRelTySize - Returns the size of relocatble field in bits
virtual unsigned getRelocationTySize(unsigned RelTy) const;
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEnumeratedTypes.td Wed Sep 12 12:43:34 2012
@@ -589,3 +589,31 @@
def IL_OP_GDS_LOAD_USHORT : ILOpCode<418, "gds_load_ushort">;
def IL_OP_GDS_STORE_BYTE : ILOpCode<419, "gds_store_byte">;
def IL_OP_GDS_STORE_SHORT : ILOpCode<420, "gds_store_short">;
+def IL_OP_FENCE_LMG : ILOpCode<421, "fence_lds_memory_gds">;
+def IL_OP_FENCE_L : ILOpCode<422, "fence_lds">;
+def IL_OP_FENCE_M : ILOpCode<423, "fence_memory">;
+def IL_OP_FENCE_G : ILOpCode<424, "fence_gds">;
+def IL_OP_FENCE_LM : ILOpCode<425, "fence_lds_memory">;
+def IL_OP_FENCE_LG : ILOpCode<426, "fence_lds_gds">;
+def IL_OP_FENCE_MG : ILOpCode<427, "fence_memory_gds">;
+def IL_OP_FENCE_RO_LMG : ILOpCode<428, "fence_lds_memory_gds_mem_read_only">;
+def IL_OP_FENCE_RO_L : ILOpCode<429, "fence_lds_mem_read_only">;
+def IL_OP_FENCE_RO_M : ILOpCode<430, "fence_memory_mem_read_only">;
+def IL_OP_FENCE_RO_G : ILOpCode<431, "fence_gds_mem_read_only">;
+def IL_OP_FENCE_RO_LM : ILOpCode<432, "fence_lds_memory_mem_read_only">;
+def IL_OP_FENCE_RO_LG : ILOpCode<433, "fence_lds_gds_mem_read_only">;
+def IL_OP_FENCE_RO_MG : ILOpCode<434, "fence_memory_gds_mem_read_only">;
+def IL_OP_FENCE_WO_LMG : ILOpCode<435, "fence_lds_memory_gds_mem_write_only">;
+def IL_OP_FENCE_WO_L : ILOpCode<436, "fence_lds_mem_write_only">;
+def IL_OP_FENCE_WO_M : ILOpCode<437, "fence_memory_mem_write_only">;
+def IL_OP_FENCE_WO_G : ILOpCode<438, "fence_gds_mem_write_only">;
+def IL_OP_FENCE_WO_LM : ILOpCode<439, "fence_lds_memory_mem_write_only">;
+def IL_OP_FENCE_WO_LG : ILOpCode<440, "fence_lds_gds_mem_write_only">;
+def IL_OP_FENCE_WO_MG : ILOpCode<441, "fence_memory_gds_mem_write_only">;
+def IL_OP_FENCE_S : ILOpCode<442, "fence_lds_memory_gds_gws">;
+def IL_OP_FENCE_LS : ILOpCode<443, "fence_lds_gws">;
+def IL_OP_FENCE_MS : ILOpCode<444, "fence_memory_gws">;
+def IL_OP_FENCE_GS : ILOpCode<445, "fence_gds_gws">;
+def IL_OP_FENCE_LMS : ILOpCode<446, "fence_lds_memory_gws">;
+def IL_OP_FENCE_LGS : ILOpCode<447, "fence_lds_gds_gws">;
+def IL_OP_FENCE_MGS : ILOpCode<448, "fence_memory_gds_gws">;
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.cpp Wed Sep 12 12:43:34 2012
@@ -18,8 +18,7 @@
using namespace llvm;
AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
- : AMDILDevice(ST)
-{
+ : AMDILDevice(ST) {
setCaps();
std::string name = ST->getDeviceName();
if (name == "cedar") {
@@ -32,45 +31,33 @@
mDeviceFlag = OCL_DEVICE_JUNIPER;
}
}
-
-AMDILEvergreenDevice::~AMDILEvergreenDevice()
-{
+AMDILEvergreenDevice::~AMDILEvergreenDevice() {
}
-
-size_t AMDILEvergreenDevice::getMaxLDSSize() const
-{
+size_t AMDILEvergreenDevice::getMaxLDSSize() const {
if (usesHardware(AMDILDeviceInfo::LocalMem)) {
return MAX_LDS_SIZE_800;
} else {
return 0;
}
}
-size_t AMDILEvergreenDevice::getMaxGDSSize() const
-{
+size_t AMDILEvergreenDevice::getMaxGDSSize() const {
if (usesHardware(AMDILDeviceInfo::RegionMem)) {
return MAX_GDS_SIZE_800;
} else {
return 0;
}
}
-uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const
-{
+uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
return 12;
}
-
-uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const
-{
+uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
switch(id) {
default:
assert(0 && "ID type passed in is unknown!");
break;
case CONSTANT_ID:
case RAW_UAV_ID:
- if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
- return GLOBAL_RETURN_RAW_UAV_ID;
- } else {
- return DEFAULT_RAW_UAV_ID;
- }
+ return GLOBAL_RETURN_RAW_UAV_ID;
case GLOBAL_ID:
case ARENA_UAV_ID:
return DEFAULT_ARENA_UAV_ID;
@@ -95,29 +82,21 @@
};
return 0;
}
-
-size_t AMDILEvergreenDevice::getWavefrontSize() const
-{
+size_t AMDILEvergreenDevice::getWavefrontSize() const {
return AMDILDevice::WavefrontSize;
}
-
-uint32_t AMDILEvergreenDevice::getGeneration() const
-{
+uint32_t AMDILEvergreenDevice::getGeneration() const {
return AMDILDeviceInfo::HD5XXX;
}
-
-void AMDILEvergreenDevice::setCaps()
-{
+void AMDILEvergreenDevice::setCaps() {
mHWBits.set(AMDILDeviceInfo::ByteGDSOps);
mSWBits.reset(AMDILDeviceInfo::ByteGDSOps);
mSWBits.set(AMDILDeviceInfo::ArenaSegment);
mHWBits.set(AMDILDeviceInfo::ArenaUAV);
mHWBits.set(AMDILDeviceInfo::Semaphore);
- if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
- mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
- mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
- }
+ mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
mHWBits.set(AMDILDeviceInfo::ByteStores);
@@ -136,101 +115,67 @@
} else {
mHWBits.set(AMDILDeviceInfo::Images);
}
- if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
- mHWBits.set(AMDILDeviceInfo::CachedMem);
- }
+ mHWBits.set(AMDILDeviceInfo::CachedMem);
if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
mHWBits.set(AMDILDeviceInfo::MultiUAV);
}
- if (mSTM->calVersion() > CAL_VERSION_SC_136) {
- mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
- mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
- mHWBits.set(AMDILDeviceInfo::ArenaVectors);
- } else {
- mSWBits.set(AMDILDeviceInfo::ArenaVectors);
- }
- if (mSTM->calVersion() > CAL_VERSION_SC_137) {
- mHWBits.set(AMDILDeviceInfo::LongOps);
- mSWBits.reset(AMDILDeviceInfo::LongOps);
- }
+ mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDILDeviceInfo::ArenaVectors);
+ mHWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.reset(AMDILDeviceInfo::LongOps);
mHWBits.set(AMDILDeviceInfo::TmrReg);
}
-
FunctionPass*
-AMDILEvergreenDevice::getIOExpansion(
- TargetMachine& TM, CodeGenOpt::Level OptLevel) const
+AMDILEvergreenDevice::getIOExpansion() const
{
- return new AMDILEGIOExpansion(TM, OptLevel);
+ return new AMDILEGIOExpansion();
}
-
AsmPrinter*
AMDILEvergreenDevice::getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const
{
return new AMDILEGAsmPrinter(ASM_PRINTER_ARGUMENTS);
}
-
FunctionPass*
AMDILEvergreenDevice::getPointerManager(
TargetMachine& TM, CodeGenOpt::Level OptLevel) const
{
- return new AMDILEGPointerManager(TM, OptLevel);
+ return new AMDILEGPointerManager();
}
-
AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST)
-{
+ : AMDILEvergreenDevice(ST) {
setCaps();
}
-
-AMDILCypressDevice::~AMDILCypressDevice()
-{
+AMDILCypressDevice::~AMDILCypressDevice() {
}
-
-void AMDILCypressDevice::setCaps()
-{
+void AMDILCypressDevice::setCaps() {
if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
mHWBits.set(AMDILDeviceInfo::DoubleOps);
mHWBits.set(AMDILDeviceInfo::FMA);
}
}
-
-
AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST)
-{
+ : AMDILEvergreenDevice(ST) {
setCaps();
}
-
-AMDILCedarDevice::~AMDILCedarDevice()
-{
+AMDILCedarDevice::~AMDILCedarDevice() {
}
-
-void AMDILCedarDevice::setCaps()
-{
+void AMDILCedarDevice::setCaps() {
mSWBits.set(AMDILDeviceInfo::FMA);
}
-
-size_t AMDILCedarDevice::getWavefrontSize() const
-{
+size_t AMDILCedarDevice::getWavefrontSize() const {
return AMDILDevice::QuarterWavefrontSize;
}
-
AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST)
-{
+ : AMDILEvergreenDevice(ST) {
setCaps();
}
-
AMDILRedwoodDevice::~AMDILRedwoodDevice()
{
}
-
-void AMDILRedwoodDevice::setCaps()
-{
+void AMDILRedwoodDevice::setCaps() {
mSWBits.set(AMDILDeviceInfo::FMA);
}
-
-size_t AMDILRedwoodDevice::getWavefrontSize() const
-{
+size_t AMDILRedwoodDevice::getWavefrontSize() const {
return AMDILDevice::HalfWavefrontSize;
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILEvergreenDevice.h Wed Sep 12 12:43:34 2012
@@ -15,20 +15,17 @@
#define _AMDILEVERGREENDEVICE_H_
#include "AMDILDevice.h"
#include "AMDILSubtarget.h"
-namespace llvm
-{
+namespace llvm {
class AMDILSubtarget;
//===----------------------------------------------------------------------===//
// Evergreen generation of devices and their respective sub classes
//===----------------------------------------------------------------------===//
-
// The AMDILEvergreenDevice is the base device class for all of the Evergreen
// series of cards. This class contains information required to differentiate
// the Evergreen device from the generic AMDILDevice. This device represents
// that capabilities of the 'Juniper' cards, also known as the HD57XX.
-class AMDILEvergreenDevice : public AMDILDevice
-{
+class AMDILEvergreenDevice : public AMDILDevice {
public:
AMDILEvergreenDevice(AMDILSubtarget *ST);
virtual ~AMDILEvergreenDevice();
@@ -38,8 +35,7 @@
virtual uint32_t getGeneration() const;
virtual uint32_t getMaxNumUAVs() const;
virtual uint32_t getResourceID(uint32_t) const;
- virtual FunctionPass*
- getIOExpansion(TargetMachine&, CodeGenOpt::Level) const;
+ virtual FunctionPass* getIOExpansion() const;
virtual AsmPrinter*
getAsmPrinter(AMDIL_ASM_PRINTER_ARGUMENTS) const;
virtual FunctionPass*
@@ -52,8 +48,7 @@
// support for double precision operations. This device is used to represent
// both the Cypress and Hemlock cards, which are commercially known as HD58XX
// and HD59XX cards.
-class AMDILCypressDevice : public AMDILEvergreenDevice
-{
+class AMDILCypressDevice : public AMDILEvergreenDevice {
public:
AMDILCypressDevice(AMDILSubtarget *ST);
virtual ~AMDILCypressDevice();
@@ -61,13 +56,11 @@
virtual void setCaps();
}; // AMDILCypressDevice
-
// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
// devices. This class differs from the base AMDILEvergreenDevice in that the
// device is a ~quarter of the 'Juniper'. These are commercially known as the
// HD54XX and HD53XX series of cards.
-class AMDILCedarDevice : public AMDILEvergreenDevice
-{
+class AMDILCedarDevice : public AMDILEvergreenDevice {
public:
AMDILCedarDevice(AMDILSubtarget *ST);
virtual ~AMDILCedarDevice();
@@ -80,8 +73,7 @@
// devices. This class differs from the base class, in that these devices are
// considered about half of a 'Juniper' device. These are commercially known as
// the HD55XX and HD56XX series of cards.
-class AMDILRedwoodDevice : public AMDILEvergreenDevice
-{
+class AMDILRedwoodDevice : public AMDILEvergreenDevice {
public:
AMDILRedwoodDevice(AMDILSubtarget *ST);
virtual ~AMDILRedwoodDevice();
@@ -89,6 +81,5 @@
private:
virtual void setCaps();
}; // AMDILRedwoodDevice
-
} // namespace llvm
#endif // _AMDILEVERGREENDEVICE_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFixupKinds.h Wed Sep 12 12:43:34 2012
@@ -17,10 +17,8 @@
#include "llvm/MC/MCFixup.h"
-namespace llvm
-{
-namespace AMDIL
-{
+namespace llvm {
+namespace AMDIL {
enum Fixups {
reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
reloc_riprel_4byte, // 32-bit rip-relative
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFormats.td Wed Sep 12 12:43:34 2012
@@ -26,232 +26,96 @@
ILOpCode operation = op;
let Pattern = pattern;
let AsmString = !strconcat(asmstr, "\n");
- bit hasIEEEFlag = 0;
- bit hasZeroOpFlag = 0;
+ bits<2> extLoadType = 0;
+ bit load = 0;
+ bit store = 0;
+ bit truncate = 0;
+ bit atomic = 0;
+ bit addr64 = 0;
+ bit global = 0;
+ bit private = 0;
+ bit constant = 0;
+ bit cpool = 0;
+ bit region = 0;
+ bit local = 0;
+ bit gds = 0;
+ bit lds = 0;
+ bit cbmem = 0;
+ bit scratch = 0;
+ bit rawuav = 0;
+ bit arenauav = 0;
+ bit image = 0;
+ bits<2> info = 0;
+ bit txld = 0;
+ bit sema = 0;
+ bit append = 0;
+ bit swsextload = 0;
+ bit loadconst = 0;
+ bit ieee = 0;
+ bit zeroop = 0;
+ bit flat = 0;
+ bits<6> swizzle = 0;
+ bit gws = 0;
+ bit packed = 0;
+ bit sub32bit = 0;
+ bits<2> packType = 0;
+ bit vector = 0;
+ // This has to be kept in sync with (1ULL << AMDIL.h::AMDID) enumeration.
+ let TSFlags{2-1} = extLoadType;
+ let TSFlags{3} = load;
+ let TSFlags{4} = store;
+ let TSFlags{5} = truncate;
+ let TSFlags{6} = atomic;
+ let TSFlags{7} = addr64;
+ let TSFlags{8} = global;
+ let TSFlags{9} = private;
+ let TSFlags{10} = constant;
+ let TSFlags{11} = cpool;
+ let TSFlags{12} = region;
+ let TSFlags{13} = local;
+ let TSFlags{14} = gds;
+ let TSFlags{15} = lds;
+ let TSFlags{16} = cbmem;
+ let TSFlags{17} = scratch;
+ let TSFlags{18} = rawuav;
+ let TSFlags{19} = arenauav;
+ let TSFlags{20} = image;
+ let TSFlags{22-21} = info;
+ let TSFlags{23} = txld;
+ let TSFlags{24} = sema;
+ let TSFlags{25} = append;
+ let TSFlags{26} = swsextload;
+ let TSFlags{27} = loadconst;
+ let TSFlags{28} = ieee;
+ let TSFlags{29} = zeroop;
+ let TSFlags{30} = flat;
+ let TSFlags{36-31} = swizzle;
+ let TSFlags{37} = gws;
+ let TSFlags{38} = packed;
+ let TSFlags{39} = sub32bit;
+ let TSFlags{41-40} = packType;
+ let TSFlags{42} = vector;
}
//===--------------------------------------------------------------------===//
-// The base class for vector insert instructions. It is a single dest, quad
-// source instruction where the last two source operands must be 32bit
-// immediate values that are encoding the swizzle of the source register
-// The src2 and src3 instructions must also be inversion of each other such
-// that if src2 is 0x1000300(x0z0), src3 must be 0x20004(0y0w). The values
-// are encoded as 32bit integer with each 8 char representing a swizzle value.
-// The encoding is as follows for 32bit register types:
-// 0x00 -> '_'
-// 0x01 -> 'x'
-// 0x02 -> 'y'
-// 0x03 -> 'z'
-// 0x04 -> 'w'
-// 0x05 -> 'x'
-// 0x06 -> 'y'
-// 0x07 -> 'z'
-// 0x08 -> 'w'
-// 0x09 -> '0'
-// The encoding is as follows for 64bit register types:
-// 0x00 -> "__"
-// 0x01 -> "xy"
-// 0x02 -> "zw"
-// 0x03 -> "xy"
-// 0x04 -> "zw"
-// 0x05 -> "00"
-//===--------------------------------------------------------------------===//
-class InsertVectorClass<ILOpCode op, RegisterClass DReg, RegisterClass SReg,
- SDNode OpNode, string asmstr> :
- ILFormat<op, (outs DReg:$dst),
- (ins DReg:$src0, SReg:$src1, i32imm:$src2, i32imm:$src3),
- !strconcat(asmstr, " $dst, $src0, $src1"),
- [(set DReg:$dst, (OpNode DReg:$src0, SReg:$src1,
- timm:$src2, timm:$src3))]>;
-
-//===--------------------------------------------------------------------===//
-// Class that has one input parameters and one output parameter.
-// The basic pattern for this class is "Opcode Dst, Src0" and
-// handles the unary math operators.
-// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
-// if the addressing is register relative for input and output register 0.
-//===--------------------------------------------------------------------===//
-class OneInOneOut<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : ILFormat<op, outs, ins, asmstr, pattern>
-{
- ILDst dst_reg;
- ILDstMod dst_mod;
- ILRelAddr dst_rel;
- ILSrc dst_reg_rel;
- ILSrcMod dst_reg_rel_mod;
- ILSrc src0_reg;
- ILSrcMod src0_mod;
- ILRelAddr src0_rel;
- ILSrc src0_reg_rel;
- ILSrcMod src0_reg_rel_mod;
-}
-
-//===--------------------------------------------------------------------===//
-// A simplified version of OneInOneOut class where the pattern is standard
-// and does not need special cases. This requires that the pattern has
-// a SDNode and takes a source and destination register that is of type
-// RegisterClass. This is the standard unary op class.
-//===--------------------------------------------------------------------===//
-class UnaryOp<ILOpCode op, SDNode OpNode,
- RegisterClass dRegs, RegisterClass sRegs>
- : OneInOneOut<op, (outs dRegs:$dst), (ins sRegs:$src),
- !strconcat(op.Text, " $dst, $src"),
- [(set dRegs:$dst, (OpNode sRegs:$src))]>;
-
-//===--------------------------------------------------------------------===//
-// This class is similiar to the UnaryOp class, however, there is no
-// result value to assign.
-//===--------------------------------------------------------------------===//
-class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : ILFormat<op, outs, ins, asmstr, pattern>
-{
- ILSrc src0_reg;
- ILSrcMod src0_mod;
- ILRelAddr src0_rel;
- ILSrc src0_reg_rel;
- ILSrcMod src0_reg_rel_mod;
-}
-
-//===--------------------------------------------------------------------===//
-// Set of classes that have two input parameters and one output parameter.
-// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
-// handles the binary math operators and comparison operations.
-// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
-// if the addressing is register relative for input register 1.
-//===--------------------------------------------------------------------===//
-class TwoInOneOut<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : OneInOneOut<op, outs, ins, asmstr, pattern>
-{
- ILSrc src1_reg;
- ILSrcMod src1_mod;
- ILRelAddr src1_rel;
- ILSrc src1_reg_rel;
- ILSrcMod src1_reg_rel_mod;
-}
-//===--------------------------------------------------------------------===//
-// A simplification of the TwoInOneOut pattern for Binary Operations.
-// This class is a helper class that assumes the simple pattern of
-// $dst = op $src0 $src1.
-// Other type of matching patterns need to use the TwoInOneOut class.
-//===--------------------------------------------------------------------===//
-class BinaryOp<ILOpCode op, SDNode OpNode, RegisterClass dReg,
- RegisterClass sReg0, RegisterClass sReg1>
- : TwoInOneOut<op, (outs dReg:$dst), (ins sReg0:$src0, sReg1:$src1),
- !strconcat(op.Text, " $dst, $src0, $src1"),
- [(set dReg:$dst, (OpNode sReg0:$src0, sReg1:$src1))]>;
-
-//===--------------------------------------------------------------------===//
-// The base class for vector extract instructions. The vector extract
-// instructions take as an input value a source register and a 32bit integer
-// with the same encoding as specified in InsertVectorClass and produces
-// a result with only the swizzled component in the destination register.
-//===--------------------------------------------------------------------===//
-class ExtractVectorClass<RegisterClass DReg, RegisterClass SReg, SDNode OpNode>
-: TwoInOneOut<IL_OP_MOV, (outs DReg:$dst), (ins SReg:$src0, i32imm:$src1),
- "mov $dst, $src0",
- [(set DReg:$dst, (OpNode SReg:$src0, timm:$src1))]>;
-
-//===--------------------------------------------------------------------===//
-// The base class for vector concatenation. This class creates either a vec2
-// or a vec4 of 32bit data types or a vec2 of 64bit data types. This is done
-// by swizzling either the 'x' or 'xy' components of the source operands
-// into the destination register.
-//===--------------------------------------------------------------------===//
-class VectorConcatClass<RegisterClass Dst, RegisterClass Src, SDNode OpNode>
- : TwoInOneOut<IL_OP_I_ADD, (outs Dst:$dst), (ins Src:$src0, Src:$src1),
- "iadd $dst, $src0, $src1",
- [(set Dst:$dst, (OpNode Src:$src0, Src:$src1))]>;
-
-//===--------------------------------------------------------------------===//
-// Similiar to the UnaryOpNoRet class, but takes as arguments two input
-// operands. Used mainly for barrier instructions on PC platform.
-//===--------------------------------------------------------------------===//
-class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : UnaryOpNoRet<op, outs, ins, asmstr, pattern>
-{
- ILSrc src1_reg;
- ILSrcMod src1_mod;
- ILRelAddr src1_rel;
- ILSrc src1_reg_rel;
- ILSrcMod src1_reg_rel_mod;
-}
-
-//===--------------------------------------------------------------------===//
-// Set of classes that have three input parameters and one output parameter.
-// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
-// handles the mad and conditional mov instruction.
-// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
-// if the addressing is register relative.
-// This class is the parent class of TernaryOp
-//===--------------------------------------------------------------------===//
-class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : TwoInOneOut<op, outs, ins, asmstr, pattern> {
- ILSrc src2_reg;
- ILSrcMod src2_mod;
- ILRelAddr src2_rel;
- ILSrc src2_reg_rel;
- ILSrcMod src2_reg_rel_mod;
- }
-
-//===--------------------------------------------------------------------===//
-// The g version of the Three Input pattern uses a standard pattern but
-// but allows specification of the register to further generalize the class
-// This class is mainly used in the generic multiclasses in AMDILMultiClass.td
-//===--------------------------------------------------------------------===//
-class TernaryOp<ILOpCode op, SDNode OpNode,
- RegisterClass dReg,
- RegisterClass sReg0,
- RegisterClass sReg1,
- RegisterClass sReg2>
- : ThreeInOneOut<op, (outs dReg:$dst),
- (ins sReg0:$src0, sReg1:$src1, sReg2:$src2),
- !strconcat(op.Text, " $dst, $src0, $src1, $src2"),
- [(set dReg:$dst,
- (OpNode sReg0:$src0, sReg1:$src1, sReg2:$src2))]>;
-
-//===--------------------------------------------------------------------===//
-// Set of classes that have three input parameters and one output parameter.
-// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
-// handles the mad and conditional mov instruction.
-// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
-// if the addressing is register relative.
-// This class is the parent class of TernaryOp
-//===--------------------------------------------------------------------===//
-class FourInOneOut<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : ThreeInOneOut<op, outs, ins, asmstr, pattern> {
- ILSrc src3_reg;
- ILSrcMod src3_mod;
- ILRelAddr src3_rel;
- ILSrc src3_reg_rel;
- ILSrcMod src3_reg_rel_mod;
- }
-
-
-//===--------------------------------------------------------------------===//
-// The macro class that is an extension of OneInOneOut but is tailored for
+// The macro class that is an extension of ILFormat but is tailored for
// macros only where all the register types are the same
//===--------------------------------------------------------------------===//
class UnaryMacro<RegisterClass Dst, RegisterClass Src0, SDNode OpNode>
-: OneInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+: ILFormat<IL_OP_MACRO, (outs Dst:$dst),
(ins Src0:$src0),
"($dst),($src0)",
[(set Dst:$dst, (OpNode Src0:$src0))]>;
//===--------------------------------------------------------------------===//
-// The macro class is an extension of TwoInOneOut but is tailored for
+// The macro class is an extension of ILFormat but is tailored for
// macros only where all the register types are the same
//===--------------------------------------------------------------------===//
class BinaryMacro<RegisterClass Dst,
RegisterClass Src0,
RegisterClass Src1,
SDNode OpNode>
- : TwoInOneOut<IL_OP_MACRO, (outs Dst:$dst),
+ : ILFormat<IL_OP_MACRO, (outs Dst:$dst),
(ins Src0: $src0, Src1:$src1),
"($dst),($src0, $src1)",
[(set Dst:$dst, (OpNode Src0:$src0, Src1:$src1))]>;
@@ -470,6 +334,24 @@
(ins MEM6464:$ptr, GPRI64:$src, GPRI64:$src1, i32imm:$id),
!strconcat(op.Text, !strconcat(idType," $ptr, $src1, $src")),
[(intr ADDR64:$ptr, GPRI64:$src, GPRI64:$src1, timm:$id)]>;
+
+// atomic global load
+class AtomGLoadClass<ILOpCode op, Operand memType, RegisterClass valRegType,
+ ComplexPattern addrType>
+ : ILFormat<op, (outs valRegType:$dst),
+ (ins memType:$ptr, GPRI32:$order, i32imm:$id),
+ !strconcat(op.Text, "_id($id)_uncached $dst, $ptr"),
+ [(set valRegType:$dst,
+ (atom_g_load addrType:$ptr, GPRI32:$order, timm:$id))]>;
+
+// atomic global store
+class AtomGStoreClass<ILOpCode op, Operand memType, RegisterClass valRegType,
+ ComplexPattern addrType>
+ : ILFormat<op, (outs),
+ (ins memType:$ptr, valRegType:$src, GPRI32:$order, i32imm:$id),
+ !strconcat(op.Text, "_id($id)_uncached $ptr, $src"),
+ [(atom_g_store addrType:$ptr, valRegType:$src,
+ GPRI32:$order, timm:$id)]>;
//===--------------------------------------------------------------------===//
// Intrinsic classes
// Generic versions of the above classes but for Target specific intrinsics
@@ -477,40 +359,40 @@
//===--------------------------------------------------------------------===//
let TargetPrefix = "AMDIL", isTarget = 1 in {
class VoidIntLong :
- Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+ Intrinsic<[llvm_i64_ty], [], []>;
class VoidIntInt :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [], []>;
class VoidIntBool :
- Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+ Intrinsic<[llvm_i32_ty], [], []>;
class UnaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>;
class UnaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], []>;
class ConvertIntFTOI :
- Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], []>;
class ConvertIntITOF :
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], []>;
class UnaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty], [IntrNoMem]>;
+ Intrinsic<[], [llvm_anyint_ty], []>;
class UnaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty], [IntrNoMem]>;
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
class BinaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
class BinaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>;
class BinaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], [IntrNoMem]>;
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
class BinaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
class TernaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ LLVMMatchType<0>, LLVMMatchType<0>], []>;
class TernaryIntFloat :
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ LLVMMatchType<0>, LLVMMatchType<0>], []>;
class QuaternaryIntInt :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], []>;
class UnaryAtomicInt :
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
class BinaryAtomicInt :
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.cpp Wed Sep 12 12:43:34 2012
@@ -20,20 +20,16 @@
: TargetFrameLowering(D, StackAl, LAO, TransAl)
{
}
-
AMDILFrameLowering::~AMDILFrameLowering()
{
}
-
/// getFrameIndexOffset - Returns the displacement from the frame register to
/// the stack frame of the specified index.
int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const
-{
+ int FI) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->getObjectOffset(FI);
}
-
const TargetFrameLowering::SpillSlot *
AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
{
@@ -45,7 +41,8 @@
{
}
void
-AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+AMDILFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const
{
}
bool
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILFrameLowering.h Wed Sep 12 12:43:34 2012
@@ -22,10 +22,8 @@
/// each function, and the offset to the locals area.
/// See TargetFrameInfo for more comments.
-namespace llvm
-{
-class AMDILFrameLowering : public TargetFrameLowering
-{
+namespace llvm {
+class AMDILFrameLowering : public TargetFrameLowering {
public:
AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
TransAl = 1);
@@ -37,6 +35,6 @@
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
virtual bool hasFP(const MachineFunction &MF) const;
-}; // class AMDILFrameLowering
+}; // class AMDILFrameLowering
} // namespace llvm
#endif // _AMDILFRAME_LOWERING_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.cpp Wed Sep 12 12:43:34 2012
@@ -32,38 +32,29 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/Support/DebugLoc.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
-char AMDILIOExpansion::ID = 0;
-namespace llvm
-{
+namespace llvm {
FunctionPass*
-createAMDILIOExpansion(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+createAMDILIOExpansion(TargetMachine& TM, CodeGenOpt::Level OptLevel)
{
- return TM.getSubtarget<AMDILSubtarget>()
- .device()->getIOExpansion(TM, OptLevel);
-}
+ return TM.getSubtarget<AMDILSubtarget>().device()->getIOExpansion();
}
-
-AMDILIOExpansion::AMDILIOExpansion(TargetMachine &tm,
- CodeGenOpt::Level OptLevel) :
- MachineFunctionPass(ID), TM(tm)
-{
- mSTM = &tm.getSubtarget<AMDILSubtarget>();
- mDebug = DEBUGME;
- mTII = tm.getInstrInfo();
- mKM = NULL;
}
-AMDILIOExpansion::~AMDILIOExpansion()
+AMDILIOExpansionImpl::AMDILIOExpansionImpl(MachineFunction& mf)
+ : mDebug(DEBUGME), MF(mf), mBB(NULL),TM(MF.getTarget())
{
+ mSTM = &TM.getSubtarget<AMDILSubtarget>();
+ mKM = const_cast<AMDILKernelManager*>(mSTM->getKernelManager());
+ mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
+ mTRI = TM.getRegisterInfo();
+ mTII = TM.getInstrInfo();
}
-
bool
-AMDILIOExpansion::runOnMachineFunction(MachineFunction &MF)
+AMDILIOExpansionImpl::run()
{
- mKM = const_cast<AMDILKernelManager*>(mSTM->getKernelManager());
- mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
MFI != MFE; ++MFI) {
MachineBasicBlock *MBB = MFI;
@@ -85,37 +76,33 @@
}
return false;
}
-const char *AMDILIOExpansion::getPassName() const
-{
- return "AMDIL Generic IO Expansion Pass";
-}
bool
-AMDILIOExpansion::isIOInstruction(MachineInstr *MI)
+AMDILIOExpansionImpl::isIOInstruction(MachineInstr *MI)
{
if (!MI) {
return false;
}
- if (isLoadInst(TM, MI) || isStoreInst(TM, MI)) {
+ if (isPtrLoadInst(MI) || isPtrStoreInst(MI)) {
return true;
}
return false;
}
void
-AMDILIOExpansion::expandIOInstruction(MachineInstr *MI)
+AMDILIOExpansionImpl::expandIOInstruction(MachineInstr *MI)
{
assert(isIOInstruction(MI) && "Must be an IO instruction to "
"be passed to this function!");
- if (isLoadInst(TM, MI)) {
- if (isGlobalInst(TM, MI)) {
+ if (isPtrLoadInst(MI)) {
+ if (isGlobalInst(MI)) {
expandGlobalLoad(MI);
- } else if (isRegionInst(TM, MI)) {
+ } else if (isRegionInst(MI)) {
expandRegionLoad(MI);
- } else if (isPrivateInst(TM, MI)) {
+ } else if (isPrivateInst(MI)) {
expandPrivateLoad(MI);
- } else if (isLocalInst(TM, MI)) {
+ } else if (isLocalInst(MI)) {
expandLocalLoad(MI);
- } else if (isConstantInst(TM, MI)) {
- if (isConstantPoolInst(TM, MI)) {
+ } else if (isConstantInst(MI)) {
+ if (isConstantPoolInst(MI)) {
expandConstantPoolLoad(MI);
} else {
expandConstantLoad(MI);
@@ -123,14 +110,14 @@
} else {
assert(!"Found an unsupported load instruction!");
}
- } else if (isStoreInst(TM, MI)) {
- if (isGlobalInst(TM, MI)) {
+ } else if (isPtrStoreInst(MI)) {
+ if (isGlobalInst(MI)) {
expandGlobalStore(MI);
- } else if (isRegionInst(TM, MI)) {
+ } else if (isRegionInst(MI)) {
expandRegionStore(MI);
- } else if (isPrivateInst(TM, MI)) {
+ } else if (isPrivateInst(MI)) {
expandPrivateStore(MI);
- } else if (isLocalInst(TM, MI)) {
+ } else if (isLocalInst(MI)) {
expandLocalStore(MI);
} else {
assert(!"Found an unsupported load instruction!");
@@ -139,11 +126,10 @@
assert(!"Found an unsupported IO instruction!");
}
}
-
bool
-AMDILIOExpansion::isAddrCalcInstr(MachineInstr *MI)
+AMDILIOExpansionImpl::isAddrCalcInstr(MachineInstr *MI)
{
- if (isPrivateInst(TM, MI) && isLoadInst(TM, MI)) {
+ if (isPrivateInst(MI) && isPtrLoadInst(MI)) {
// This section of code is a workaround for the problem of
// globally scoped constant address variables. The problems
// comes that although they are declared in the constant
@@ -165,138 +151,36 @@
} else {
return false;
}
- } else if (isConstantPoolInst(TM, MI) && isLoadInst(TM, MI)) {
+ } else if (isConstantPoolInst(MI) && isPtrLoadInst(MI)) {
return MI->getOperand(1).isReg();
- } else if (isPrivateInst(TM, MI) && isStoreInst(TM, MI)) {
+ } else if (isPrivateInst(MI) && isPtrStoreInst(MI)) {
return mSTM->device()->usesSoftware(AMDILDeviceInfo::PrivateMem);
- } else if (isLocalInst(TM, MI) && (isStoreInst(TM, MI) || isLoadInst(TM, MI))) {
+ } else if (isLocalInst(MI) && (isPtrStoreInst(MI) || isPtrLoadInst(MI))) {
return mSTM->device()->usesSoftware(AMDILDeviceInfo::LocalMem);
}
return false;
}
-
bool
-AMDILIOExpansion::isExtendLoad(MachineInstr *MI)
+AMDILIOExpansionImpl::isExtendLoad(MachineInstr *MI)
{
- return isSExtLoadInst(TM, MI) || isZExtLoadInst(TM, MI) || isAExtLoadInst(TM, MI);
+ return isSExtLoadInst(MI) || isZExtLoadInst(MI) || isAExtLoadInst(MI);
}
-
bool
-AMDILIOExpansion::isHardwareRegion(MachineInstr *MI)
+AMDILIOExpansionImpl::isHardwareRegion(MachineInstr *MI)
{
- return (isRegionInst(TM, MI) && (isLoadInst(TM, MI) || isStoreInst(TM, MI)) &&
+ return (isRegionInst(MI) && (isPtrLoadInst(MI) || isPtrStoreInst(MI)) &&
mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem));
}
bool
-AMDILIOExpansion::isHardwareLocal(MachineInstr *MI)
+AMDILIOExpansionImpl::isHardwareLocal(MachineInstr *MI)
{
- return (isLocalInst(TM, MI) && (isLoadInst(TM, MI) || isStoreInst(TM, MI)) &&
+ return (isLocalInst(MI) && (isPtrLoadInst(MI) || isPtrStoreInst(MI)) &&
mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem));
}
bool
-AMDILIOExpansion::isPackedData(MachineInstr *MI)
-{
- switch(MI->getOpcode()) {
- default:
- if (isTruncStoreInst(TM, MI)) {
- switch (MI->getDesc().OpInfo[0].RegClass) {
- default:
- break;
- case AMDIL::GPRV2I64RegClassID:
- case AMDIL::GPRV2I32RegClassID:
- switch (getMemorySize(MI)) {
- case 2:
- case 4:
- return true;
- default:
- break;
- }
- break;
- case AMDIL::GPRV4I32RegClassID:
- switch (getMemorySize(MI)) {
- case 4:
- case 8:
- return true;
- default:
- break;
- }
- break;
- }
- }
- break;
- ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD);
- ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD);
- ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::LOCALLOAD);
- ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::REGIONLOAD);
- ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD);
- ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD);
- ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD);
- ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD);
- ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE)
- ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE);
- ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE);
- ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE);
- ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE);
- ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE);
- ExpandCaseToPackedTypes(AMDIL::LOCALSTORE);
- ExpandCaseToPackedTypes(AMDIL::REGIONSTORE);
- ExpandCaseToPackedTypes(AMDIL::CPOOLLOAD64);
- ExpandCaseToPackedTypes(AMDIL::CPOOLSEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::CPOOLZEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::CPOOLAEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::GLOBALLOAD64);
- ExpandCaseToPackedTypes(AMDIL::GLOBALSEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::GLOBALZEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::GLOBALAEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::LOCALLOAD64);
- ExpandCaseToPackedTypes(AMDIL::LOCALSEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::LOCALZEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::LOCALAEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::REGIONLOAD64);
- ExpandCaseToPackedTypes(AMDIL::REGIONSEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::REGIONZEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::REGIONAEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::PRIVATELOAD64);
- ExpandCaseToPackedTypes(AMDIL::PRIVATESEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::PRIVATEZEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::PRIVATEAEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::CONSTANTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::CONSTANTSEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::CONSTANTAEXTLOAD64);
- ExpandCaseToPackedTypes(AMDIL::CONSTANTZEXTLOAD64);
- ExpandCaseToAllTruncTypes(AMDIL::GLOBALTRUNCSTORE64)
- ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE64);
- ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE64);
- ExpandCaseToAllTruncTypes(AMDIL::REGIONTRUNCSTORE64);
- ExpandCaseToPackedTypes(AMDIL::GLOBALSTORE64);
- ExpandCaseToPackedTypes(AMDIL::PRIVATESTORE64);
- ExpandCaseToPackedTypes(AMDIL::LOCALSTORE64);
- ExpandCaseToPackedTypes(AMDIL::REGIONSTORE64);
- return true;
- }
- return false;
-}
-
-bool
-AMDILIOExpansion::isStaticCPLoad(MachineInstr *MI)
+AMDILIOExpansionImpl::isStaticCPLoad(MachineInstr *MI)
{
- if (isConstantPoolInst(TM, MI) && isLoadInst(TM, MI)) {
+ if (isConstantPoolInst(MI) && isPtrLoadInst(MI)) {
uint32_t x = 0;
uint32_t num = MI->getNumOperands();
for (x = 0; x < num; ++x) {
@@ -307,9 +191,8 @@
}
return false;
}
-
bool
-AMDILIOExpansion::isNbitType(Type *mType, uint32_t nBits, bool isScalar)
+AMDILIOExpansionImpl::isNbitType(Type *mType, uint32_t nBits, bool isScalar)
{
if (!mType) {
return false;
@@ -336,334 +219,35 @@
return false;
}
}
-
bool
-AMDILIOExpansion::isHardwareInst(MachineInstr *MI)
+AMDILIOExpansionImpl::isHardwareInst(MachineInstr *MI)
{
AMDILAS::InstrResEnc curInst;
getAsmPrinterFlags(MI, curInst);
return curInst.bits.HardwareInst;
}
-
-uint32_t
-AMDILIOExpansion::getDataReg(MachineInstr *MI)
-{
- REG_PACKED_TYPE id = getPackedID(MI);
- switch (getMemorySize(MI)) {
- default:
- return AMDIL::R1011;
- case 4:
- if (id == UNPACK_V4I8
- || id == PACK_V4I8) {
- return AMDIL::R1011;
- } else if (id == UNPACK_V2I16
- || id == PACK_V2I16) {
- return AMDIL::Rxy1011;
- }
- case 2:
- if (id == UNPACK_V2I8
- || id == PACK_V2I8) {
- return AMDIL::Rxy1011;
- }
- case 1:
- return AMDIL::Rx1011;
- case 8:
- if (id == UNPACK_V4I16
- || id == PACK_V4I16) {
- return AMDIL::R1011;
- }
- return AMDIL::Rxy1011;
- }
-}
-
REG_PACKED_TYPE
-AMDILIOExpansion::getPackedID(MachineInstr *MI)
+AMDILIOExpansionImpl::getPackedID(MachineInstr *MI)
{
- switch (MI->getOpcode()) {
- default:
- break;
- case AMDIL::GLOBALTRUNCSTORE64_v2i64i8:
- case AMDIL::REGIONTRUNCSTORE64_v2i64i8:
- case AMDIL::LOCALTRUNCSTORE64_v2i64i8:
- case AMDIL::PRIVATETRUNCSTORE64_v2i64i8:
- case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
- case AMDIL::REGIONTRUNCSTORE_v2i64i8:
- case AMDIL::LOCALTRUNCSTORE_v2i64i8:
- case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
- case AMDIL::GLOBALTRUNCSTORE64_v2i32i8:
- case AMDIL::REGIONTRUNCSTORE64_v2i32i8:
- case AMDIL::LOCALTRUNCSTORE64_v2i32i8:
- case AMDIL::PRIVATETRUNCSTORE64_v2i32i8:
- case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
- case AMDIL::REGIONTRUNCSTORE_v2i32i8:
- case AMDIL::LOCALTRUNCSTORE_v2i32i8:
- case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
- case AMDIL::GLOBALTRUNCSTORE64_v2i16i8:
- case AMDIL::REGIONTRUNCSTORE64_v2i16i8:
- case AMDIL::LOCALTRUNCSTORE64_v2i16i8:
- case AMDIL::PRIVATETRUNCSTORE64_v2i16i8:
- case AMDIL::GLOBALSTORE64_v2i8:
- case AMDIL::LOCALSTORE64_v2i8:
- case AMDIL::REGIONSTORE64_v2i8:
- case AMDIL::PRIVATESTORE64_v2i8:
- case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
- case AMDIL::REGIONTRUNCSTORE_v2i16i8:
- case AMDIL::LOCALTRUNCSTORE_v2i16i8:
- case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
- case AMDIL::GLOBALSTORE_v2i8:
- case AMDIL::LOCALSTORE_v2i8:
- case AMDIL::REGIONSTORE_v2i8:
- case AMDIL::PRIVATESTORE_v2i8:
- return PACK_V2I8;
- case AMDIL::GLOBALTRUNCSTORE64_v4i32i8:
- case AMDIL::REGIONTRUNCSTORE64_v4i32i8:
- case AMDIL::LOCALTRUNCSTORE64_v4i32i8:
- case AMDIL::PRIVATETRUNCSTORE64_v4i32i8:
- case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
- case AMDIL::REGIONTRUNCSTORE_v4i32i8:
- case AMDIL::LOCALTRUNCSTORE_v4i32i8:
- case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
- case AMDIL::GLOBALTRUNCSTORE64_v4i16i8:
- case AMDIL::REGIONTRUNCSTORE64_v4i16i8:
- case AMDIL::LOCALTRUNCSTORE64_v4i16i8:
- case AMDIL::PRIVATETRUNCSTORE64_v4i16i8:
- case AMDIL::GLOBALSTORE64_v4i8:
- case AMDIL::LOCALSTORE64_v4i8:
- case AMDIL::REGIONSTORE64_v4i8:
- case AMDIL::PRIVATESTORE64_v4i8:
- case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
- case AMDIL::REGIONTRUNCSTORE_v4i16i8:
- case AMDIL::LOCALTRUNCSTORE_v4i16i8:
- case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
- case AMDIL::GLOBALSTORE_v4i8:
- case AMDIL::LOCALSTORE_v4i8:
- case AMDIL::REGIONSTORE_v4i8:
- case AMDIL::PRIVATESTORE_v4i8:
- return PACK_V4I8;
- case AMDIL::GLOBALTRUNCSTORE64_v2i64i16:
- case AMDIL::REGIONTRUNCSTORE64_v2i64i16:
- case AMDIL::LOCALTRUNCSTORE64_v2i64i16:
- case AMDIL::PRIVATETRUNCSTORE64_v2i64i16:
- case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
- case AMDIL::REGIONTRUNCSTORE_v2i64i16:
- case AMDIL::LOCALTRUNCSTORE_v2i64i16:
- case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
- case AMDIL::GLOBALTRUNCSTORE64_v2i32i16:
- case AMDIL::REGIONTRUNCSTORE64_v2i32i16:
- case AMDIL::LOCALTRUNCSTORE64_v2i32i16:
- case AMDIL::PRIVATETRUNCSTORE64_v2i32i16:
- case AMDIL::GLOBALSTORE64_v2i16:
- case AMDIL::LOCALSTORE64_v2i16:
- case AMDIL::REGIONSTORE64_v2i16:
- case AMDIL::PRIVATESTORE64_v2i16:
- case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
- case AMDIL::REGIONTRUNCSTORE_v2i32i16:
- case AMDIL::LOCALTRUNCSTORE_v2i32i16:
- case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
- case AMDIL::GLOBALSTORE_v2i16:
- case AMDIL::LOCALSTORE_v2i16:
- case AMDIL::REGIONSTORE_v2i16:
- case AMDIL::PRIVATESTORE_v2i16:
- return PACK_V2I16;
- case AMDIL::GLOBALTRUNCSTORE64_v4i32i16:
- case AMDIL::REGIONTRUNCSTORE64_v4i32i16:
- case AMDIL::LOCALTRUNCSTORE64_v4i32i16:
- case AMDIL::PRIVATETRUNCSTORE64_v4i32i16:
- case AMDIL::GLOBALSTORE64_v4i16:
- case AMDIL::LOCALSTORE64_v4i16:
- case AMDIL::REGIONSTORE64_v4i16:
- case AMDIL::PRIVATESTORE64_v4i16:
- case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
- case AMDIL::REGIONTRUNCSTORE_v4i32i16:
- case AMDIL::LOCALTRUNCSTORE_v4i32i16:
- case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
- case AMDIL::GLOBALSTORE_v4i16:
- case AMDIL::LOCALSTORE_v4i16:
- case AMDIL::REGIONSTORE_v4i16:
- case AMDIL::PRIVATESTORE_v4i16:
- return PACK_V4I16;
-
- case AMDIL::GLOBALLOAD64_v2i8:
- case AMDIL::GLOBALSEXTLOAD64_v2i8:
- case AMDIL::GLOBALAEXTLOAD64_v2i8:
- case AMDIL::GLOBALZEXTLOAD64_v2i8:
- case AMDIL::LOCALLOAD64_v2i8:
- case AMDIL::LOCALSEXTLOAD64_v2i8:
- case AMDIL::LOCALAEXTLOAD64_v2i8:
- case AMDIL::LOCALZEXTLOAD64_v2i8:
- case AMDIL::REGIONLOAD64_v2i8:
- case AMDIL::REGIONSEXTLOAD64_v2i8:
- case AMDIL::REGIONAEXTLOAD64_v2i8:
- case AMDIL::REGIONZEXTLOAD64_v2i8:
- case AMDIL::PRIVATELOAD64_v2i8:
- case AMDIL::PRIVATESEXTLOAD64_v2i8:
- case AMDIL::PRIVATEAEXTLOAD64_v2i8:
- case AMDIL::PRIVATEZEXTLOAD64_v2i8:
- case AMDIL::CONSTANTLOAD64_v2i8:
- case AMDIL::CONSTANTSEXTLOAD64_v2i8:
- case AMDIL::CONSTANTAEXTLOAD64_v2i8:
- case AMDIL::CONSTANTZEXTLOAD64_v2i8:
- case AMDIL::GLOBALLOAD_v2i8:
- case AMDIL::GLOBALSEXTLOAD_v2i8:
- case AMDIL::GLOBALAEXTLOAD_v2i8:
- case AMDIL::GLOBALZEXTLOAD_v2i8:
- case AMDIL::LOCALLOAD_v2i8:
- case AMDIL::LOCALSEXTLOAD_v2i8:
- case AMDIL::LOCALAEXTLOAD_v2i8:
- case AMDIL::LOCALZEXTLOAD_v2i8:
- case AMDIL::REGIONLOAD_v2i8:
- case AMDIL::REGIONSEXTLOAD_v2i8:
- case AMDIL::REGIONAEXTLOAD_v2i8:
- case AMDIL::REGIONZEXTLOAD_v2i8:
- case AMDIL::PRIVATELOAD_v2i8:
- case AMDIL::PRIVATESEXTLOAD_v2i8:
- case AMDIL::PRIVATEAEXTLOAD_v2i8:
- case AMDIL::PRIVATEZEXTLOAD_v2i8:
- case AMDIL::CONSTANTLOAD_v2i8:
- case AMDIL::CONSTANTSEXTLOAD_v2i8:
- case AMDIL::CONSTANTAEXTLOAD_v2i8:
- case AMDIL::CONSTANTZEXTLOAD_v2i8:
- return UNPACK_V2I8;
-
- case AMDIL::GLOBALLOAD64_v4i8:
- case AMDIL::GLOBALSEXTLOAD64_v4i8:
- case AMDIL::GLOBALAEXTLOAD64_v4i8:
- case AMDIL::GLOBALZEXTLOAD64_v4i8:
- case AMDIL::LOCALLOAD64_v4i8:
- case AMDIL::LOCALSEXTLOAD64_v4i8:
- case AMDIL::LOCALAEXTLOAD64_v4i8:
- case AMDIL::LOCALZEXTLOAD64_v4i8:
- case AMDIL::REGIONLOAD64_v4i8:
- case AMDIL::REGIONSEXTLOAD64_v4i8:
- case AMDIL::REGIONAEXTLOAD64_v4i8:
- case AMDIL::REGIONZEXTLOAD64_v4i8:
- case AMDIL::PRIVATELOAD64_v4i8:
- case AMDIL::PRIVATESEXTLOAD64_v4i8:
- case AMDIL::PRIVATEAEXTLOAD64_v4i8:
- case AMDIL::PRIVATEZEXTLOAD64_v4i8:
- case AMDIL::CONSTANTLOAD64_v4i8:
- case AMDIL::CONSTANTSEXTLOAD64_v4i8:
- case AMDIL::CONSTANTAEXTLOAD64_v4i8:
- case AMDIL::CONSTANTZEXTLOAD64_v4i8:
- case AMDIL::GLOBALLOAD_v4i8:
- case AMDIL::GLOBALSEXTLOAD_v4i8:
- case AMDIL::GLOBALAEXTLOAD_v4i8:
- case AMDIL::GLOBALZEXTLOAD_v4i8:
- case AMDIL::LOCALLOAD_v4i8:
- case AMDIL::LOCALSEXTLOAD_v4i8:
- case AMDIL::LOCALAEXTLOAD_v4i8:
- case AMDIL::LOCALZEXTLOAD_v4i8:
- case AMDIL::REGIONLOAD_v4i8:
- case AMDIL::REGIONSEXTLOAD_v4i8:
- case AMDIL::REGIONAEXTLOAD_v4i8:
- case AMDIL::REGIONZEXTLOAD_v4i8:
- case AMDIL::PRIVATELOAD_v4i8:
- case AMDIL::PRIVATESEXTLOAD_v4i8:
- case AMDIL::PRIVATEAEXTLOAD_v4i8:
- case AMDIL::PRIVATEZEXTLOAD_v4i8:
- case AMDIL::CONSTANTLOAD_v4i8:
- case AMDIL::CONSTANTSEXTLOAD_v4i8:
- case AMDIL::CONSTANTAEXTLOAD_v4i8:
- case AMDIL::CONSTANTZEXTLOAD_v4i8:
- return UNPACK_V4I8;
-
- case AMDIL::GLOBALLOAD64_v2i16:
- case AMDIL::GLOBALSEXTLOAD64_v2i16:
- case AMDIL::GLOBALAEXTLOAD64_v2i16:
- case AMDIL::GLOBALZEXTLOAD64_v2i16:
- case AMDIL::LOCALLOAD64_v2i16:
- case AMDIL::LOCALSEXTLOAD64_v2i16:
- case AMDIL::LOCALAEXTLOAD64_v2i16:
- case AMDIL::LOCALZEXTLOAD64_v2i16:
- case AMDIL::REGIONLOAD64_v2i16:
- case AMDIL::REGIONSEXTLOAD64_v2i16:
- case AMDIL::REGIONAEXTLOAD64_v2i16:
- case AMDIL::REGIONZEXTLOAD64_v2i16:
- case AMDIL::PRIVATELOAD64_v2i16:
- case AMDIL::PRIVATESEXTLOAD64_v2i16:
- case AMDIL::PRIVATEAEXTLOAD64_v2i16:
- case AMDIL::PRIVATEZEXTLOAD64_v2i16:
- case AMDIL::CONSTANTLOAD64_v2i16:
- case AMDIL::CONSTANTSEXTLOAD64_v2i16:
- case AMDIL::CONSTANTAEXTLOAD64_v2i16:
- case AMDIL::CONSTANTZEXTLOAD64_v2i16:
- case AMDIL::GLOBALLOAD_v2i16:
- case AMDIL::GLOBALSEXTLOAD_v2i16:
- case AMDIL::GLOBALAEXTLOAD_v2i16:
- case AMDIL::GLOBALZEXTLOAD_v2i16:
- case AMDIL::LOCALLOAD_v2i16:
- case AMDIL::LOCALSEXTLOAD_v2i16:
- case AMDIL::LOCALAEXTLOAD_v2i16:
- case AMDIL::LOCALZEXTLOAD_v2i16:
- case AMDIL::REGIONLOAD_v2i16:
- case AMDIL::REGIONSEXTLOAD_v2i16:
- case AMDIL::REGIONAEXTLOAD_v2i16:
- case AMDIL::REGIONZEXTLOAD_v2i16:
- case AMDIL::PRIVATELOAD_v2i16:
- case AMDIL::PRIVATESEXTLOAD_v2i16:
- case AMDIL::PRIVATEAEXTLOAD_v2i16:
- case AMDIL::PRIVATEZEXTLOAD_v2i16:
- case AMDIL::CONSTANTLOAD_v2i16:
- case AMDIL::CONSTANTSEXTLOAD_v2i16:
- case AMDIL::CONSTANTAEXTLOAD_v2i16:
- case AMDIL::CONSTANTZEXTLOAD_v2i16:
- return UNPACK_V2I16;
-
- case AMDIL::GLOBALLOAD64_v4i16:
- case AMDIL::GLOBALSEXTLOAD64_v4i16:
- case AMDIL::GLOBALAEXTLOAD64_v4i16:
- case AMDIL::GLOBALZEXTLOAD64_v4i16:
- case AMDIL::LOCALLOAD64_v4i16:
- case AMDIL::LOCALSEXTLOAD64_v4i16:
- case AMDIL::LOCALAEXTLOAD64_v4i16:
- case AMDIL::LOCALZEXTLOAD64_v4i16:
- case AMDIL::REGIONLOAD64_v4i16:
- case AMDIL::REGIONSEXTLOAD64_v4i16:
- case AMDIL::REGIONAEXTLOAD64_v4i16:
- case AMDIL::REGIONZEXTLOAD64_v4i16:
- case AMDIL::PRIVATELOAD64_v4i16:
- case AMDIL::PRIVATESEXTLOAD64_v4i16:
- case AMDIL::PRIVATEAEXTLOAD64_v4i16:
- case AMDIL::PRIVATEZEXTLOAD64_v4i16:
- case AMDIL::CONSTANTLOAD64_v4i16:
- case AMDIL::CONSTANTSEXTLOAD64_v4i16:
- case AMDIL::CONSTANTAEXTLOAD64_v4i16:
- case AMDIL::CONSTANTZEXTLOAD64_v4i16:
- case AMDIL::GLOBALLOAD_v4i16:
- case AMDIL::GLOBALSEXTLOAD_v4i16:
- case AMDIL::GLOBALAEXTLOAD_v4i16:
- case AMDIL::GLOBALZEXTLOAD_v4i16:
- case AMDIL::LOCALLOAD_v4i16:
- case AMDIL::LOCALSEXTLOAD_v4i16:
- case AMDIL::LOCALAEXTLOAD_v4i16:
- case AMDIL::LOCALZEXTLOAD_v4i16:
- case AMDIL::REGIONLOAD_v4i16:
- case AMDIL::REGIONSEXTLOAD_v4i16:
- case AMDIL::REGIONAEXTLOAD_v4i16:
- case AMDIL::REGIONZEXTLOAD_v4i16:
- case AMDIL::PRIVATELOAD_v4i16:
- case AMDIL::PRIVATESEXTLOAD_v4i16:
- case AMDIL::PRIVATEAEXTLOAD_v4i16:
- case AMDIL::PRIVATEZEXTLOAD_v4i16:
- case AMDIL::CONSTANTLOAD_v4i16:
- case AMDIL::CONSTANTSEXTLOAD_v4i16:
- case AMDIL::CONSTANTAEXTLOAD_v4i16:
- case AMDIL::CONSTANTZEXTLOAD_v4i16:
- return UNPACK_V4I16;
- };
+ if (isPackV2I8Inst(MI)) return PACK_V2I8;
+ if (isPackV4I8Inst(MI)) return PACK_V4I8;
+ if (isPackV2I16Inst(MI)) return PACK_V2I16;
+ if (isPackV4I16Inst(MI)) return PACK_V4I16;
+ if (isUnpackV2I8Inst(MI)) return UNPACK_V2I8;
+ if (isUnpackV4I8Inst(MI)) return UNPACK_V4I8;
+ if (isUnpackV2I16Inst(MI)) return UNPACK_V2I16;
+ if (isUnpackV4I16Inst(MI)) return UNPACK_V4I16;
return NO_PACKING;
}
-
uint32_t
-AMDILIOExpansion::getPointerID(MachineInstr *MI)
+AMDILIOExpansionImpl::getPointerID(MachineInstr *MI)
{
AMDILAS::InstrResEnc curInst;
getAsmPrinterFlags(MI, curInst);
return curInst.bits.ResourceID;
}
-
uint32_t
-AMDILIOExpansion::getShiftSize(MachineInstr *MI)
+AMDILIOExpansionImpl::getShiftSize(MachineInstr *MI)
{
switch(getPackedID(MI)) {
default:
@@ -682,17 +266,19 @@
return 0;
}
uint32_t
-AMDILIOExpansion::getMemorySize(MachineInstr *MI)
+AMDILIOExpansionImpl::getMemorySize(MachineInstr *MI)
{
if (MI->memoperands_empty()) {
return 4;
}
return (uint32_t)((*MI->memoperands_begin())->getSize());
}
-
-unsigned
-AMDILIOExpansion::expandLongExtend(MachineInstr *MI,
- uint32_t numComps, uint32_t size, bool signedShift)
+void
+AMDILIOExpansionImpl::expandLongExtend(MachineInstr *MI,
+ uint32_t numComps,
+ uint32_t size,
+ bool signedShift,
+ uint32_t &dataReg)
{
DebugLoc DL = MI->getDebugLoc();
switch(size) {
@@ -701,30 +287,58 @@
break;
case 8:
if (numComps == 1) {
- return expandLongExtendSub32(MI, AMDIL::SHL_i8, AMDIL::SHRVEC_v2i32,
- AMDIL::USHRVEC_i8,
- 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE, signedShift,
- false);
+ expandLongExtendSub32(MI,
+ AMDIL::SHLi8i32rr,
+ AMDIL::SHRv2i32i32rr,
+ AMDIL::USHRi8i32rr,
+ 24,
+ (24ULL | (31ULL << 32)),
+ 24,
+ AMDIL::LCREATEi64rr,
+ signedShift,
+ false,
+ dataReg);
} else if (numComps == 2) {
- return expandLongExtendSub32(MI, AMDIL::SHL_v2i8, AMDIL::SHRVEC_v4i32,
- AMDIL::USHRVEC_v2i8,
- 24, (24ULL | (31ULL << 32)), 24, AMDIL::LCREATE_v2i64, signedShift,
- true);
+ expandLongExtendSub32(MI,
+ AMDIL::SHLv2i8i32rr,
+ AMDIL::SHRv4i32i32rr,
+ AMDIL::USHRv2i8i32rr,
+ 24,
+ (24ULL | (31ULL << 32)),
+ 24,
+ AMDIL::LCREATEv2i64rr,
+ signedShift,
+ true,
+ dataReg);
} else {
assert(0 && "Found a case we don't handle!");
}
break;
case 16:
if (numComps == 1) {
- return expandLongExtendSub32(MI, AMDIL::SHL_i16, AMDIL::SHRVEC_v2i32,
- AMDIL::USHRVEC_i16,
- 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE, signedShift,
- false);
+ expandLongExtendSub32(MI,
+ AMDIL::SHLi16i32rr,
+ AMDIL::SHRv2i32i32rr,
+ AMDIL::USHRi16i32rr,
+ 16,
+ (16ULL | (31ULL << 32)),
+ 16,
+ AMDIL::LCREATEi64rr,
+ signedShift,
+ false,
+ dataReg);
} else if (numComps == 2) {
- return expandLongExtendSub32(MI, AMDIL::SHL_v2i16, AMDIL::SHRVEC_v4i32,
- AMDIL::USHRVEC_v2i16,
- 16, (16ULL | (31ULL << 32)), 16, AMDIL::LCREATE_v2i64, signedShift,
- true);
+ expandLongExtendSub32(MI,
+ AMDIL::SHLv2i16i32rr,
+ AMDIL::SHRv4i32i32rr,
+ AMDIL::USHRv2i16i32rr,
+ 16,
+ (16ULL | (31ULL << 32)),
+ 16,
+ AMDIL::LCREATEv2i64rr,
+ signedShift,
+ true,
+ dataReg);
} else {
assert(0 && "Found a case we don't handle!");
}
@@ -733,73 +347,78 @@
if (numComps == 1) {
MachineInstr *nMI = NULL;
if (signedShift) {
- nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_i32), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1011)
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRi32i32rr), dataReg)
+ .addReg(getCompReg(dataReg, sub_x_comp, sub_z_comp))
.addImm(mMFI->addi64Literal((0ULL | (31ULL << 32))));
} else {
- nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1011)
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATEi64rr), dataReg)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0));
}
- return nMI->getOperand(0).getReg();
} else if (numComps == 2) {
MachineInstr *nMI = NULL;
if (signedShift) {
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRVEC_v2i32), AMDIL::Rxy1012)
- .addReg(AMDIL::Rxy1011)
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::SHRv2i32i32rr), AMDIL::Rxy1012)
+ .addReg(getCompReg(dataReg, sub_xy_comp, sub_zw_comp))
.addImm(mMFI->addi64Literal(31));
- nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
- .addReg(AMDIL::Rxy1011)
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATEv2i64rr), dataReg)
+ .addReg(dataReg)
.addReg(AMDIL::Rxy1012);
} else {
- nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATE_v2i64), AMDIL::R1011)
- .addReg(AMDIL::Rxy1011)
+ nMI = BuildMI(*mBB, MI, DL, mTII->get(AMDIL::LCREATEv2i64rr), dataReg)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0));
}
- return nMI->getOperand(0).getReg();
} else {
assert(0 && "Found a case we don't handle!");
}
};
- return 0;
}
-unsigned
-AMDILIOExpansion::expandLongExtendSub32(MachineInstr *MI,
- unsigned SHLop, unsigned SHRop, unsigned USHRop,
- unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
- unsigned LCRop, bool signedShift, bool vec2)
+void
+AMDILIOExpansionImpl::expandLongExtendSub32(MachineInstr *MI,
+ unsigned SHLop,
+ unsigned SHRop,
+ unsigned USHRop,
+ unsigned SHLimm,
+ uint64_t SHRimm,
+ unsigned USHRimm,
+ unsigned LCRop,
+ bool signedShift,
+ bool vec2,
+ uint32_t &dataReg)
{
MachineInstr *nMI = NULL;
DebugLoc DL = MI->getDebugLoc();
+ unsigned origReg = dataReg;
BuildMI(*mBB, MI, DL, mTII->get(SHLop),
(vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
- .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(SHLimm));
+ dataReg = (vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011;
if (signedShift) {
BuildMI(*mBB, MI, DL, mTII->get(LCRop),
(vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
- .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
- .addReg(AMDIL::Rxy1011);
+ .addReg(dataReg).addReg(dataReg);
+ dataReg = (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011;
nMI = BuildMI(*mBB, MI, DL, mTII->get(SHRop),
- (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
- .addReg((vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
+ origReg).addReg(dataReg)
.addImm(mMFI->addi64Literal(SHRimm));
} else {
BuildMI(*mBB, MI, DL, mTII->get(USHRop),
- (vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
- .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ dataReg).addReg(dataReg)
.addImm(mMFI->addi32Literal(USHRimm));
nMI = BuildMI(*mBB, MI, MI->getDebugLoc(), mTII->get(LCRop),
- (vec2) ? AMDIL::R1011 : AMDIL::Rxy1011)
- .addReg((vec2) ? AMDIL::Rxy1011 : AMDIL::Rx1011)
+ origReg)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0));
}
- return nMI->getOperand(0).getReg();
}
-
-unsigned
-AMDILIOExpansion::expandIntegerExtend(MachineInstr *MI, unsigned SHLop,
- unsigned SHRop, unsigned offset, unsigned reg)
+void
+AMDILIOExpansionImpl::expandIntegerExtend(MachineInstr *MI,
+ unsigned SHLop,
+ unsigned SHRop,
+ unsigned offset,
+ unsigned reg)
{
DebugLoc DL = MI->getDebugLoc();
offset = mMFI->addi32Literal(offset);
@@ -809,13 +428,12 @@
BuildMI(*mBB, MI, DL,
mTII->get(SHRop), reg)
.addReg(reg).addImm(offset);
- return reg;
}
-unsigned
-AMDILIOExpansion::expandExtendLoad(MachineInstr *MI)
+void
+AMDILIOExpansionImpl::expandExtendLoad(MachineInstr *MI, uint32_t &dataReg)
{
if (!isExtendLoad(MI)) {
- return 0;
+ return;
}
Type *mType = NULL;
if (!MI->memoperands_empty()) {
@@ -825,7 +443,7 @@
}
unsigned opcode = 0;
DebugLoc DL = MI->getDebugLoc();
- if (isZExtLoadInst(TM, MI) || isAExtLoadInst(TM, MI) || isSExtLoadInst(TM, MI)) {
+ if (isExtLoadInst(MI)) {
switch(MI->getDesc().OpInfo[0].RegClass) {
default:
assert(0 && "Found an extending load that we don't handle!");
@@ -833,69 +451,72 @@
case AMDIL::GPRI16RegClassID:
if (!isHardwareLocal(MI)
|| mSTM->device()->usesSoftware(AMDILDeviceInfo::ByteLDSOps)) {
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i16 : AMDIL::USHRVEC_i16;
- return expandIntegerExtend(MI, AMDIL::SHL_i16, opcode, 24, AMDIL::Rx1011);
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRi16i32rr : AMDIL::USHRi16i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLi16i32rr, opcode, 24, dataReg);
}
break;
case AMDIL::GPRV2I16RegClassID:
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i16 : AMDIL::USHRVEC_v2i16;
- return expandIntegerExtend(MI, AMDIL::SHL_v2i16, opcode, 24, AMDIL::Rxy1011);
+ opcode =
+ isSExtLoadInst(MI) ? AMDIL::SHRv2i16i32rr : AMDIL::USHRv2i16i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLv2i16i32rr, opcode, 24, dataReg);
break;
case AMDIL::GPRV4I8RegClassID:
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i8 : AMDIL::USHRVEC_v4i8;
- return expandIntegerExtend(MI, AMDIL::SHL_v4i8, opcode, 24, AMDIL::R1011);
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRv4i8i32rr : AMDIL::USHRv4i8i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLv4i8i32rr, opcode, 24, dataReg);
break;
case AMDIL::GPRV4I16RegClassID:
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i16 : AMDIL::USHRVEC_v4i16;
- return expandIntegerExtend(MI, AMDIL::SHL_v4i16, opcode, 24, AMDIL::R1011);
+ opcode =
+ isSExtLoadInst(MI) ? AMDIL::SHRv4i16i32rr : AMDIL::USHRv4i16i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLv4i16i32rr, opcode, 24, dataReg);
break;
case AMDIL::GPRI32RegClassID:
// We can be a i8 or i16 bit sign extended value
if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
- expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 24, AMDIL::Rx1011);
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRi32i32rr : AMDIL::USHRi32i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLi32i32rr, opcode, 24, dataReg);
} else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_i32 : AMDIL::USHRVEC_i32;
- expandIntegerExtend(MI, AMDIL::SHL_i32, opcode, 16, AMDIL::Rx1011);
+ opcode = isSExtLoadInst(MI) ? AMDIL::SHRi32i32rr : AMDIL::USHRi32i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLi32i32rr, opcode, 16, dataReg);
} else {
assert(0 && "Found an extending load that we don't handle!");
}
- return AMDIL::Rx1011;
break;
case AMDIL::GPRV2I32RegClassID:
// We can be a v2i8 or v2i16 bit sign extended value
if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
- expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 24, AMDIL::Rxy1011);
+ opcode =
+ isSExtLoadInst(MI) ? AMDIL::SHRv2i32i32rr : AMDIL::USHRv2i32i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLv2i32i32rr, opcode, 24, dataReg);
} else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v2i32 : AMDIL::USHRVEC_v2i32;
- expandIntegerExtend(MI, AMDIL::SHL_v2i32, opcode, 16, AMDIL::Rxy1011);
+ opcode =
+ isSExtLoadInst(MI) ? AMDIL::SHRv2i32i32rr : AMDIL::USHRv2i32i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLv2i32i32rr, opcode, 16, dataReg);
} else {
assert(0 && "Found an extending load that we don't handle!");
}
- return AMDIL::Rxy1011;
break;
case AMDIL::GPRV4I32RegClassID:
// We can be a v4i8 or v4i16 bit sign extended value
if (isNbitType(mType, 8, false) || getMemorySize(MI) == 4) {
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
- expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 24, AMDIL::R1011);
+ opcode =
+ isSExtLoadInst(MI) ? AMDIL::SHRv4i32i32rr : AMDIL::USHRv4i32i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLv4i32i32rr, opcode, 24, dataReg);
} else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 8) {
- opcode = isSExtLoadInst(TM, MI) ? AMDIL::SHRVEC_v4i32 : AMDIL::USHRVEC_v4i32;
- expandIntegerExtend(MI, AMDIL::SHL_v4i32, opcode, 16, AMDIL::R1011);
+ opcode =
+ isSExtLoadInst(MI) ? AMDIL::SHRv4i32i32rr : AMDIL::USHRv4i32i32rr;
+ expandIntegerExtend(MI, AMDIL::SHLv4i32i32rr, opcode, 16, dataReg);
} else {
assert(0 && "Found an extending load that we don't handle!");
}
- return AMDIL::R1011;
break;
case AMDIL::GPRI64RegClassID:
// We can be a i8, i16 or i32 bit sign extended value
if (isNbitType(mType, 8) || getMemorySize(MI) == 1) {
- return expandLongExtend(MI, 1, 8, isSExtLoadInst(TM, MI));
+ expandLongExtend(MI, 1, 8, isSExtLoadInst(MI), dataReg);
} else if (isNbitType(mType, 16) || getMemorySize(MI) == 2) {
- return expandLongExtend(MI, 1, 16, isSExtLoadInst(TM, MI));
+ expandLongExtend(MI, 1, 16, isSExtLoadInst(MI), dataReg);
} else if (isNbitType(mType, 32) || getMemorySize(MI) == 4) {
- return expandLongExtend(MI, 1, 32, isSExtLoadInst(TM, MI));
+ expandLongExtend(MI, 1, 32, isSExtLoadInst(MI), dataReg);
} else {
assert(0 && "Found an extending load that we don't handle!");
}
@@ -903,52 +524,59 @@
case AMDIL::GPRV2I64RegClassID:
// We can be a v2i8, v2i16 or v2i32 bit sign extended value
if (isNbitType(mType, 8, false) || getMemorySize(MI) == 2) {
- return expandLongExtend(MI, 2, 8, isSExtLoadInst(TM, MI));
+ expandLongExtend(MI, 2, 8, isSExtLoadInst(MI), dataReg);
} else if (isNbitType(mType, 16, false) || getMemorySize(MI) == 4) {
- return expandLongExtend(MI, 2, 16, isSExtLoadInst(TM, MI));
+ expandLongExtend(MI, 2, 16, isSExtLoadInst(MI), dataReg);
} else if (isNbitType(mType, 32, false) || getMemorySize(MI) == 8) {
- return expandLongExtend(MI, 2, 32, isSExtLoadInst(TM, MI));
+ expandLongExtend(MI, 2, 32, isSExtLoadInst(MI), dataReg);
} else {
assert(0 && "Found an extending load that we don't handle!");
}
break;
case AMDIL::GPRF32RegClassID:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::HTOF_f32), AMDIL::Rx1011)
- .addReg(AMDIL::Rx1011);
- return AMDIL::Rx1011;
+ mTII->get(AMDIL::HTOFf32r), dataReg)
+ .addReg(dataReg);
+ break;
case AMDIL::GPRV2F32RegClassID:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::HTOF_v2f32), AMDIL::Rxy1011)
- .addReg(AMDIL::Rxy1011);
- return AMDIL::Rxy1011;
+ mTII->get(AMDIL::HTOFv2f32r), dataReg)
+ .addReg(dataReg);
+ break;
case AMDIL::GPRV4F32RegClassID:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::HTOF_v4f32), AMDIL::R1011)
- .addReg(AMDIL::R1011);
- return AMDIL::R1011;
+ mTII->get(AMDIL::HTOFv4f32r), dataReg)
+ .addReg(dataReg);
+ break;
case AMDIL::GPRF64RegClassID:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::FTOD), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1011);
- return AMDIL::Rxy1011;
+ mTII->get(AMDIL::FTODr), dataReg)
+ .addReg(dataReg);
+ break;
case AMDIL::GPRV2F64RegClassID:
- BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::FTOD), AMDIL::Rzw1011)
- .addReg(AMDIL::Ry1011);
- BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::FTOD), AMDIL::Rxy1011)
- .addReg(AMDIL::Rx1011);
- return AMDIL::R1011;
+ if (mTRI->getSubReg(dataReg, sub_xy_comp)) {
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTODr), getCompReg(dataReg, sub_zw_comp))
+ .addReg(getCompReg(dataReg, sub_y_comp));
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTODr), getCompReg(dataReg, sub_xy_comp))
+ .addReg(getCompReg(dataReg, sub_x_comp));
+ } else {
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTODr), getCompReg(dataReg, sub_xy_comp))
+ .addReg(getCompReg(dataReg, sub_z_comp));
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::FTODr), getCompReg(dataReg, sub_zw_comp))
+ .addReg(getCompReg(dataReg, sub_w_comp));
+ }
+ break;
}
}
- return 0;
}
-
void
-AMDILIOExpansion::expandTruncData(MachineInstr *MI)
+AMDILIOExpansionImpl::expandTruncData(MachineInstr *MI, uint32_t &dataReg)
{
- if (!isTruncStoreInst(TM, MI)) {
+ if (!isTruncStoreInst(MI)) {
return;
}
DebugLoc DL = MI->getDebugLoc();
@@ -957,280 +585,311 @@
MI->dump();
assert(!"Found a trunc store instructions we don't handle!");
break;
- case AMDIL::GLOBALTRUNCSTORE64_i64i8:
- case AMDIL::GLOBALTRUNCSTORE64_v2i64i8:
- case AMDIL::LOCALTRUNCSTORE64_i64i8:
- case AMDIL::LOCALTRUNCSTORE64_v2i64i8:
- case AMDIL::REGIONTRUNCSTORE64_i64i8:
- case AMDIL::REGIONTRUNCSTORE64_v2i64i8:
- case AMDIL::PRIVATETRUNCSTORE64_i64i8:
- case AMDIL::PRIVATETRUNCSTORE64_v2i64i8:
- case AMDIL::GLOBALTRUNCSTORE_i64i8:
- case AMDIL::GLOBALTRUNCSTORE_v2i64i8:
- case AMDIL::LOCALTRUNCSTORE_i64i8:
- case AMDIL::LOCALTRUNCSTORE_v2i64i8:
- case AMDIL::REGIONTRUNCSTORE_i64i8:
- case AMDIL::REGIONTRUNCSTORE_v2i64i8:
- case AMDIL::PRIVATETRUNCSTORE_i64i8:
- case AMDIL::PRIVATETRUNCSTORE_v2i64i8:
+ case AMDIL::GLOBALTRUNCSTORE64i64i8r: // case AMDIL::GLOBALTRUNCSTORE64i64i8i:
+ case AMDIL::LOCALTRUNCSTORE64i64i8r: // case AMDIL::LOCALTRUNCSTORE64i64i8i:
+ case AMDIL::REGIONTRUNCSTORE64i64i8r: // case AMDIL::REGIONTRUNCSTORE64i64i8i:
+ case AMDIL::PRIVATETRUNCSTORE64i64i8r: // case AMDIL::PRIVATETRUNCSTORE64i64i8i:
+ case AMDIL::GLOBALTRUNCSTOREi64i8r: // case AMDIL::GLOBALTRUNCSTOREi64i8i:
+ case AMDIL::LOCALTRUNCSTOREi64i8r: // case AMDIL::LOCALTRUNCSTOREi64i8i:
+ case AMDIL::REGIONTRUNCSTOREi64i8r: // case AMDIL::REGIONTRUNCSTOREi64i8i:
+ case AMDIL::PRIVATETRUNCSTOREi64i8r: // case AMDIL::PRIVATETRUNCSTOREi64i8i:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
- .addReg(AMDIL::R1011);
- case AMDIL::GLOBALTRUNCSTORE64_i16i8:
- case AMDIL::GLOBALTRUNCSTORE64_v2i16i8:
- case AMDIL::GLOBALTRUNCSTORE64_v4i16i8:
- case AMDIL::LOCALTRUNCSTORE64_i16i8:
- case AMDIL::LOCALTRUNCSTORE64_v2i16i8:
- case AMDIL::LOCALTRUNCSTORE64_v4i16i8:
- case AMDIL::REGIONTRUNCSTORE64_i16i8:
- case AMDIL::REGIONTRUNCSTORE64_v2i16i8:
- case AMDIL::REGIONTRUNCSTORE64_v4i16i8:
- case AMDIL::PRIVATETRUNCSTORE64_i16i8:
- case AMDIL::PRIVATETRUNCSTORE64_v2i16i8:
- case AMDIL::PRIVATETRUNCSTORE64_v4i16i8:
- case AMDIL::GLOBALTRUNCSTORE_i16i8:
- case AMDIL::GLOBALTRUNCSTORE_v2i16i8:
- case AMDIL::GLOBALTRUNCSTORE_v4i16i8:
- case AMDIL::LOCALTRUNCSTORE_i16i8:
- case AMDIL::LOCALTRUNCSTORE_v2i16i8:
- case AMDIL::LOCALTRUNCSTORE_v4i16i8:
- case AMDIL::REGIONTRUNCSTORE_i16i8:
- case AMDIL::REGIONTRUNCSTORE_v2i16i8:
- case AMDIL::REGIONTRUNCSTORE_v4i16i8:
- case AMDIL::PRIVATETRUNCSTORE_i16i8:
- case AMDIL::PRIVATETRUNCSTORE_v2i16i8:
- case AMDIL::PRIVATETRUNCSTORE_v4i16i8:
- case AMDIL::GLOBALTRUNCSTORE64_i32i8:
- case AMDIL::GLOBALTRUNCSTORE64_v2i32i8:
- case AMDIL::GLOBALTRUNCSTORE64_v4i32i8:
- case AMDIL::LOCALTRUNCSTORE64_i32i8:
- case AMDIL::LOCALTRUNCSTORE64_v2i32i8:
- case AMDIL::LOCALTRUNCSTORE64_v4i32i8:
- case AMDIL::REGIONTRUNCSTORE64_i32i8:
- case AMDIL::REGIONTRUNCSTORE64_v2i32i8:
- case AMDIL::REGIONTRUNCSTORE64_v4i32i8:
- case AMDIL::PRIVATETRUNCSTORE64_i32i8:
- case AMDIL::PRIVATETRUNCSTORE64_v2i32i8:
- case AMDIL::PRIVATETRUNCSTORE64_v4i32i8:
- case AMDIL::GLOBALTRUNCSTORE_i32i8:
- case AMDIL::GLOBALTRUNCSTORE_v2i32i8:
- case AMDIL::GLOBALTRUNCSTORE_v4i32i8:
- case AMDIL::LOCALTRUNCSTORE_i32i8:
- case AMDIL::LOCALTRUNCSTORE_v2i32i8:
- case AMDIL::LOCALTRUNCSTORE_v4i32i8:
- case AMDIL::REGIONTRUNCSTORE_i32i8:
- case AMDIL::REGIONTRUNCSTORE_v2i32i8:
- case AMDIL::REGIONTRUNCSTORE_v4i32i8:
- case AMDIL::PRIVATETRUNCSTORE_i32i8:
- case AMDIL::PRIVATETRUNCSTORE_v2i32i8:
- case AMDIL::PRIVATETRUNCSTORE_v4i32i8:
+ mTII->get(AMDIL::LLOi64r), AMDIL::Rx1011)
+ .addReg(dataReg);
+ dataReg = AMDIL::Rx1011;
+ case AMDIL::GLOBALTRUNCSTORE64i16i8r: // case AMDIL::GLOBALTRUNCSTORE64i16i8i:
+ case AMDIL::LOCALTRUNCSTORE64i16i8r: // case AMDIL::LOCALTRUNCSTORE64i16i8i:
+ case AMDIL::REGIONTRUNCSTORE64i16i8r: // case AMDIL::REGIONTRUNCSTORE64i16i8i:
+ case AMDIL::PRIVATETRUNCSTORE64i16i8r: // case AMDIL::PRIVATETRUNCSTORE64i16i8i:
+ case AMDIL::GLOBALTRUNCSTOREi16i8r: // case AMDIL::GLOBALTRUNCSTOREi16i8i:
+ case AMDIL::LOCALTRUNCSTOREi16i8r: // case AMDIL::LOCALTRUNCSTOREi16i8i:
+ case AMDIL::REGIONTRUNCSTOREi16i8r: // case AMDIL::REGIONTRUNCSTOREi16i8i:
+ case AMDIL::PRIVATETRUNCSTOREi16i8r: // case AMDIL::PRIVATETRUNCSTOREi16i8i:
+ case AMDIL::GLOBALTRUNCSTORE64i32i8r: // case AMDIL::GLOBALTRUNCSTORE64i32i8i:
+ case AMDIL::LOCALTRUNCSTORE64i32i8r: // case AMDIL::LOCALTRUNCSTORE64i32i8i:
+ case AMDIL::REGIONTRUNCSTORE64i32i8r: // case AMDIL::REGIONTRUNCSTORE64i32i8i:
+ case AMDIL::PRIVATETRUNCSTORE64i32i8r: // case AMDIL::PRIVATETRUNCSTORE64i32i8i:
+ case AMDIL::GLOBALTRUNCSTOREi32i8r: // case AMDIL::GLOBALTRUNCSTOREi32i8i:
+ case AMDIL::LOCALTRUNCSTOREi32i8r: // case AMDIL::LOCALTRUNCSTOREi32i8i:
+ case AMDIL::REGIONTRUNCSTOREi32i8r: // case AMDIL::REGIONTRUNCSTOREi32i8i:
+ case AMDIL::PRIVATETRUNCSTOREi32i8r: // case AMDIL::PRIVATETRUNCSTOREi32i8i:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
- .addReg(AMDIL::R1011)
+ mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1011)
+ .addReg(dataReg)
.addImm(mMFI->addi32Literal(0xFF));
+ dataReg = AMDIL::Rx1011;
break;
- case AMDIL::GLOBALTRUNCSTORE64_i64i16:
- case AMDIL::GLOBALTRUNCSTORE64_v2i64i16:
- case AMDIL::LOCALTRUNCSTORE64_i64i16:
- case AMDIL::LOCALTRUNCSTORE64_v2i64i16:
- case AMDIL::REGIONTRUNCSTORE64_i64i16:
- case AMDIL::REGIONTRUNCSTORE64_v2i64i16:
- case AMDIL::PRIVATETRUNCSTORE64_i64i16:
- case AMDIL::PRIVATETRUNCSTORE64_v2i64i16:
- case AMDIL::GLOBALTRUNCSTORE_i64i16:
- case AMDIL::GLOBALTRUNCSTORE_v2i64i16:
- case AMDIL::LOCALTRUNCSTORE_i64i16:
- case AMDIL::LOCALTRUNCSTORE_v2i64i16:
- case AMDIL::REGIONTRUNCSTORE_i64i16:
- case AMDIL::REGIONTRUNCSTORE_v2i64i16:
- case AMDIL::PRIVATETRUNCSTORE_i64i16:
- case AMDIL::PRIVATETRUNCSTORE_v2i64i16:
+ case AMDIL::GLOBALTRUNCSTORE64v2i64i8r: // case AMDIL::GLOBALTRUNCSTORE64v2i64i8i:
+ case AMDIL::LOCALTRUNCSTORE64v2i64i8r: // case AMDIL::LOCALTRUNCSTORE64v2i64i8i:
+ case AMDIL::REGIONTRUNCSTORE64v2i64i8r: // case AMDIL::REGIONTRUNCSTORE64v2i64i8i:
+ case AMDIL::PRIVATETRUNCSTORE64v2i64i8r: // case AMDIL::PRIVATETRUNCSTORE64v2i64i8i:
+ case AMDIL::GLOBALTRUNCSTOREv2i64i8r: // case AMDIL::GLOBALTRUNCSTOREv2i64i8i:
+ case AMDIL::LOCALTRUNCSTOREv2i64i8r: // case AMDIL::LOCALTRUNCSTOREv2i64i8i:
+ case AMDIL::REGIONTRUNCSTOREv2i64i8r: // case AMDIL::REGIONTRUNCSTOREv2i64i8i:
+ case AMDIL::PRIVATETRUNCSTOREv2i64i8r: // case AMDIL::PRIVATETRUNCSTOREv2i64i8i:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
- .addReg(AMDIL::R1011);
- case AMDIL::GLOBALTRUNCSTORE64_i32i16:
- case AMDIL::GLOBALTRUNCSTORE64_v2i32i16:
- case AMDIL::GLOBALTRUNCSTORE64_v4i32i16:
- case AMDIL::LOCALTRUNCSTORE64_i32i16:
- case AMDIL::LOCALTRUNCSTORE64_v2i32i16:
- case AMDIL::LOCALTRUNCSTORE64_v4i32i16:
- case AMDIL::REGIONTRUNCSTORE64_i32i16:
- case AMDIL::REGIONTRUNCSTORE64_v2i32i16:
- case AMDIL::REGIONTRUNCSTORE64_v4i32i16:
- case AMDIL::PRIVATETRUNCSTORE64_i32i16:
- case AMDIL::PRIVATETRUNCSTORE64_v2i32i16:
- case AMDIL::PRIVATETRUNCSTORE64_v4i32i16:
- case AMDIL::GLOBALTRUNCSTORE_i32i16:
- case AMDIL::GLOBALTRUNCSTORE_v2i32i16:
- case AMDIL::GLOBALTRUNCSTORE_v4i32i16:
- case AMDIL::LOCALTRUNCSTORE_i32i16:
- case AMDIL::LOCALTRUNCSTORE_v2i32i16:
- case AMDIL::LOCALTRUNCSTORE_v4i32i16:
- case AMDIL::REGIONTRUNCSTORE_i32i16:
- case AMDIL::REGIONTRUNCSTORE_v2i32i16:
- case AMDIL::REGIONTRUNCSTORE_v4i32i16:
- case AMDIL::PRIVATETRUNCSTORE_i32i16:
- case AMDIL::PRIVATETRUNCSTORE_v2i32i16:
- case AMDIL::PRIVATETRUNCSTORE_v4i32i16:
+ mTII->get(AMDIL::LLOv2i64r), AMDIL::Rxy1011)
+ .addReg(dataReg);
+ dataReg = AMDIL::Rxy1011;
+ case AMDIL::GLOBALTRUNCSTORE64v2i16i8r: // case AMDIL::GLOBALTRUNCSTORE64v2i16i8i:
+ case AMDIL::LOCALTRUNCSTORE64v2i16i8r: // case AMDIL::LOCALTRUNCSTORE64v2i16i8i:
+ case AMDIL::REGIONTRUNCSTORE64v2i16i8r: // case AMDIL::REGIONTRUNCSTORE64v2i16i8i:
+ case AMDIL::PRIVATETRUNCSTORE64v2i16i8r: // case AMDIL::PRIVATETRUNCSTORE64v2i16i8i:
+ case AMDIL::GLOBALTRUNCSTOREv2i16i8r: // case AMDIL::GLOBALTRUNCSTOREv2i16i8i:
+ case AMDIL::LOCALTRUNCSTOREv2i16i8r: // case AMDIL::LOCALTRUNCSTOREv2i16i8i:
+ case AMDIL::REGIONTRUNCSTOREv2i16i8r: // case AMDIL::REGIONTRUNCSTOREv2i16i8i:
+ case AMDIL::PRIVATETRUNCSTOREv2i16i8r: // case AMDIL::PRIVATETRUNCSTOREv2i16i8i:
+ case AMDIL::GLOBALTRUNCSTORE64v2i32i8r: // case AMDIL::GLOBALTRUNCSTORE64v2i32i8i:
+ case AMDIL::LOCALTRUNCSTORE64v2i32i8r: // case AMDIL::LOCALTRUNCSTORE64v2i32i8i:
+ case AMDIL::REGIONTRUNCSTORE64v2i32i8r: // case AMDIL::REGIONTRUNCSTORE64v2i32i8i:
+ case AMDIL::PRIVATETRUNCSTORE64v2i32i8r: // case AMDIL::PRIVATETRUNCSTORE64v2i32i8i:
+ case AMDIL::GLOBALTRUNCSTOREv2i32i8r: // case AMDIL::GLOBALTRUNCSTOREv2i32i8i:
+ case AMDIL::LOCALTRUNCSTOREv2i32i8r: // case AMDIL::LOCALTRUNCSTOREv2i32i8i:
+ case AMDIL::REGIONTRUNCSTOREv2i32i8r: // case AMDIL::REGIONTRUNCSTOREv2i32i8i:
+ case AMDIL::PRIVATETRUNCSTOREv2i32i8r: // case AMDIL::PRIVATETRUNCSTOREv2i32i8i:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::BINARY_AND_v4i32), AMDIL::R1011)
- .addReg(AMDIL::R1011)
- .addImm(mMFI->addi32Literal(0xFFFF));
+ mTII->get(AMDIL::ANDv2i32rr), AMDIL::Rxy1011)
+ .addReg(dataReg)
+ .addImm(mMFI->addi32Literal(0xFF));
+ dataReg = AMDIL::Rxy1011;
break;
- case AMDIL::GLOBALTRUNCSTORE64_i64i32:
- case AMDIL::LOCALTRUNCSTORE64_i64i32:
- case AMDIL::REGIONTRUNCSTORE64_i64i32:
- case AMDIL::PRIVATETRUNCSTORE64_i64i32:
- case AMDIL::GLOBALTRUNCSTORE_i64i32:
- case AMDIL::LOCALTRUNCSTORE_i64i32:
- case AMDIL::REGIONTRUNCSTORE_i64i32:
- case AMDIL::PRIVATETRUNCSTORE_i64i32:
+ case AMDIL::GLOBALTRUNCSTORE64v4i16i8r: // case AMDIL::GLOBALTRUNCSTORE64v4i16i8i:
+ case AMDIL::LOCALTRUNCSTORE64v4i16i8r: // case AMDIL::LOCALTRUNCSTORE64v4i16i8i:
+ case AMDIL::REGIONTRUNCSTORE64v4i16i8r: // case AMDIL::REGIONTRUNCSTORE64v4i16i8i:
+ case AMDIL::PRIVATETRUNCSTORE64v4i16i8r: // case AMDIL::PRIVATETRUNCSTORE64v4i16i8i:
+ case AMDIL::GLOBALTRUNCSTOREv4i16i8r: // case AMDIL::GLOBALTRUNCSTOREv4i16i8i:
+ case AMDIL::LOCALTRUNCSTOREv4i16i8r: // case AMDIL::LOCALTRUNCSTOREv4i16i8i:
+ case AMDIL::REGIONTRUNCSTOREv4i16i8r: // case AMDIL::REGIONTRUNCSTOREv4i16i8i:
+ case AMDIL::PRIVATETRUNCSTOREv4i16i8r: // case AMDIL::PRIVATETRUNCSTOREv4i16i8i:
+ case AMDIL::GLOBALTRUNCSTORE64v4i32i8r: // case AMDIL::GLOBALTRUNCSTORE64v4i32i8i:
+ case AMDIL::LOCALTRUNCSTORE64v4i32i8r: // case AMDIL::LOCALTRUNCSTORE64v4i32i8i:
+ case AMDIL::REGIONTRUNCSTORE64v4i32i8r: // case AMDIL::REGIONTRUNCSTORE64v4i32i8i:
+ case AMDIL::PRIVATETRUNCSTORE64v4i32i8r: // case AMDIL::PRIVATETRUNCSTORE64v4i32i8i:
+ case AMDIL::GLOBALTRUNCSTOREv4i32i8r: // case AMDIL::GLOBALTRUNCSTOREv4i32i8i:
+ case AMDIL::LOCALTRUNCSTOREv4i32i8r: // case AMDIL::LOCALTRUNCSTOREv4i32i8i:
+ case AMDIL::REGIONTRUNCSTOREv4i32i8r: // case AMDIL::REGIONTRUNCSTOREv4i32i8i:
+ case AMDIL::PRIVATETRUNCSTOREv4i32i8r: // case AMDIL::PRIVATETRUNCSTOREv4i32i8i:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::LLO), AMDIL::Rx1011)
- .addReg(AMDIL::Rxy1011);
+ mTII->get(AMDIL::ANDv4i32rr), AMDIL::R1011)
+ .addReg(dataReg)
+ .addImm(mMFI->addi32Literal(0xFF));
+ dataReg = AMDIL::R1011;
break;
- case AMDIL::GLOBALTRUNCSTORE64_v2i64i32:
- case AMDIL::LOCALTRUNCSTORE64_v2i64i32:
- case AMDIL::REGIONTRUNCSTORE64_v2i64i32:
- case AMDIL::PRIVATETRUNCSTORE64_v2i64i32:
- case AMDIL::GLOBALTRUNCSTORE_v2i64i32:
- case AMDIL::LOCALTRUNCSTORE_v2i64i32:
- case AMDIL::REGIONTRUNCSTORE_v2i64i32:
- case AMDIL::PRIVATETRUNCSTORE_v2i64i32:
+ case AMDIL::GLOBALTRUNCSTORE64i64i16r: // case AMDIL::GLOBALTRUNCSTORE64i64i16i:
+ case AMDIL::LOCALTRUNCSTORE64i64i16r: // case AMDIL::LOCALTRUNCSTORE64i64i16i:
+ case AMDIL::REGIONTRUNCSTORE64i64i16r: // case AMDIL::REGIONTRUNCSTORE64i64i16i:
+ case AMDIL::PRIVATETRUNCSTORE64i64i16r: // case AMDIL::PRIVATETRUNCSTORE64i64i16i:
+ case AMDIL::GLOBALTRUNCSTOREi64i16r: // case AMDIL::GLOBALTRUNCSTOREi64i16i:
+ case AMDIL::LOCALTRUNCSTOREi64i16r: // case AMDIL::LOCALTRUNCSTOREi64i16i:
+ case AMDIL::REGIONTRUNCSTOREi64i16r: // case AMDIL::REGIONTRUNCSTOREi64i16i:
+ case AMDIL::PRIVATETRUNCSTOREi64i16r: // case AMDIL::PRIVATETRUNCSTOREi64i16i:
BuildMI(*mBB, MI, DL,
- mTII->get(AMDIL::LLO_v2i64), AMDIL::Rxy1011)
- .addReg(AMDIL::R1011);
+ mTII->get(AMDIL::LLOv2i64r), AMDIL::Rxy1011)
+ .addReg(dataReg);
+ dataReg = AMDIL::Rxy1011;
+ case AMDIL::GLOBALTRUNCSTORE64i32i16r: // case AMDIL::GLOBALTRUNCSTORE64i32i16i:
+ case AMDIL::LOCALTRUNCSTORE64i32i16r: // case AMDIL::LOCALTRUNCSTORE64i32i16i:
+ case AMDIL::REGIONTRUNCSTORE64i32i16r: // case AMDIL::REGIONTRUNCSTORE64i32i16i:
+ case AMDIL::PRIVATETRUNCSTORE64i32i16r: // case AMDIL::PRIVATETRUNCSTORE64i32i16i:
+ case AMDIL::GLOBALTRUNCSTOREi32i16r: // case AMDIL::GLOBALTRUNCSTOREi32i16i:
+ case AMDIL::LOCALTRUNCSTOREi32i16r: // case AMDIL::LOCALTRUNCSTOREi32i16i:
+ case AMDIL::REGIONTRUNCSTOREi32i16r: // case AMDIL::REGIONTRUNCSTOREi32i16i:
+ case AMDIL::PRIVATETRUNCSTOREi32i16r: // case AMDIL::PRIVATETRUNCSTOREi32i16i:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::ANDi32rr), AMDIL::Rx1011)
+ .addReg(dataReg)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ dataReg = AMDIL::Rx1011;
break;
- case AMDIL::GLOBALTRUNCSTORE64_f64f32:
- case AMDIL::LOCALTRUNCSTORE64_f64f32:
- case AMDIL::REGIONTRUNCSTORE64_f64f32:
- case AMDIL::PRIVATETRUNCSTORE64_f64f32:
- case AMDIL::GLOBALTRUNCSTORE_f64f32:
- case AMDIL::LOCALTRUNCSTORE_f64f32:
- case AMDIL::REGIONTRUNCSTORE_f64f32:
- case AMDIL::PRIVATETRUNCSTORE_f64f32:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
- AMDIL::Rx1011).addReg(AMDIL::Rxy1011);
+ case AMDIL::GLOBALTRUNCSTORE64v2i64i16r: // case AMDIL::GLOBALTRUNCSTORE64v2i64i16i:
+ case AMDIL::LOCALTRUNCSTORE64v2i64i16r: // case AMDIL::LOCALTRUNCSTORE64v2i64i16i:
+ case AMDIL::REGIONTRUNCSTORE64v2i64i16r: // case AMDIL::REGIONTRUNCSTORE64v2i64i16i:
+ case AMDIL::PRIVATETRUNCSTORE64v2i64i16r: // case AMDIL::PRIVATETRUNCSTORE64v2i64i16i:
+ case AMDIL::GLOBALTRUNCSTOREv2i64i16r: // case AMDIL::GLOBALTRUNCSTOREv2i64i16i:
+ case AMDIL::LOCALTRUNCSTOREv2i64i16r: // case AMDIL::LOCALTRUNCSTOREv2i64i16i:
+ case AMDIL::REGIONTRUNCSTOREv2i64i16r: // case AMDIL::REGIONTRUNCSTOREv2i64i16i:
+ case AMDIL::PRIVATETRUNCSTOREv2i64i16r: // case AMDIL::PRIVATETRUNCSTOREv2i64i16i:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLOv2i64r), AMDIL::Rxy1011)
+ .addReg(dataReg);
+ dataReg = AMDIL::Rxy1011;
+ case AMDIL::GLOBALTRUNCSTORE64v2i32i16r: // case AMDIL::GLOBALTRUNCSTORE64v2i32i16i:
+ case AMDIL::LOCALTRUNCSTORE64v2i32i16r: // case AMDIL::LOCALTRUNCSTORE64v2i32i16i:
+ case AMDIL::REGIONTRUNCSTORE64v2i32i16r: // case AMDIL::REGIONTRUNCSTORE64v2i32i16i:
+ case AMDIL::PRIVATETRUNCSTORE64v2i32i16r: // case AMDIL::PRIVATETRUNCSTORE64v2i32i16i:
+ case AMDIL::GLOBALTRUNCSTOREv2i32i16r: // case AMDIL::GLOBALTRUNCSTOREv2i32i16i:
+ case AMDIL::LOCALTRUNCSTOREv2i32i16r: // case AMDIL::LOCALTRUNCSTOREv2i32i16i:
+ case AMDIL::REGIONTRUNCSTOREv2i32i16r: // case AMDIL::REGIONTRUNCSTOREv2i32i16i:
+ case AMDIL::PRIVATETRUNCSTOREv2i32i16r: // case AMDIL::PRIVATETRUNCSTOREv2i32i16i:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::ANDv2i32rr), AMDIL::Rxy1011)
+ .addReg(dataReg)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ dataReg = AMDIL::Rxy1011;
break;
- case AMDIL::GLOBALTRUNCSTORE64_v2f64f32:
- case AMDIL::LOCALTRUNCSTORE64_v2f64f32:
- case AMDIL::REGIONTRUNCSTORE64_v2f64f32:
- case AMDIL::PRIVATETRUNCSTORE64_v2f64f32:
- case AMDIL::GLOBALTRUNCSTORE_v2f64f32:
- case AMDIL::LOCALTRUNCSTORE_v2f64f32:
- case AMDIL::REGIONTRUNCSTORE_v2f64f32:
- case AMDIL::PRIVATETRUNCSTORE_v2f64f32:
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
- AMDIL::Rx1011).addReg(AMDIL::Rxy1011);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOF),
- AMDIL::Ry1011).addReg(AMDIL::Rzw1011);
+ case AMDIL::GLOBALTRUNCSTORE64v4i32i16r: // case AMDIL::GLOBALTRUNCSTORE64v4i32i16i:
+ case AMDIL::LOCALTRUNCSTORE64v4i32i16r: // case AMDIL::LOCALTRUNCSTORE64v4i32i16i:
+ case AMDIL::REGIONTRUNCSTORE64v4i32i16r: // case AMDIL::REGIONTRUNCSTORE64v4i32i16i:
+ case AMDIL::PRIVATETRUNCSTORE64v4i32i16r: // case AMDIL::PRIVATETRUNCSTORE64v4i32i16i:
+ case AMDIL::GLOBALTRUNCSTOREv4i32i16r: // case AMDIL::GLOBALTRUNCSTOREv4i32i16i:
+ case AMDIL::LOCALTRUNCSTOREv4i32i16r: // case AMDIL::LOCALTRUNCSTOREv4i32i16i:
+ case AMDIL::REGIONTRUNCSTOREv4i32i16r: // case AMDIL::REGIONTRUNCSTOREv4i32i16i:
+ case AMDIL::PRIVATETRUNCSTOREv4i32i16r: // case AMDIL::PRIVATETRUNCSTOREv4i32i16i:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::ANDv4i32rr), AMDIL::R1011)
+ .addReg(dataReg)
+ .addImm(mMFI->addi32Literal(0xFFFF));
+ dataReg = AMDIL::R1011;
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64i64i32r: // case AMDIL::GLOBALTRUNCSTORE64i64i32i:
+ case AMDIL::LOCALTRUNCSTORE64i64i32r: // case AMDIL::LOCALTRUNCSTORE64i64i32i:
+ case AMDIL::REGIONTRUNCSTORE64i64i32r: // case AMDIL::REGIONTRUNCSTORE64i64i32i:
+ case AMDIL::PRIVATETRUNCSTORE64i64i32r: // case AMDIL::PRIVATETRUNCSTORE64i64i32i:
+ case AMDIL::GLOBALTRUNCSTOREi64i32r: // case AMDIL::GLOBALTRUNCSTOREi64i32i:
+ case AMDIL::LOCALTRUNCSTOREi64i32r: // case AMDIL::LOCALTRUNCSTOREi64i32i:
+ case AMDIL::REGIONTRUNCSTOREi64i32r: // case AMDIL::REGIONTRUNCSTOREi64i32i:
+ case AMDIL::PRIVATETRUNCSTOREi64i32r: // case AMDIL::PRIVATETRUNCSTOREi64i32i:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLOi64r), AMDIL::Rx1011)
+ .addReg(dataReg);
+ dataReg = AMDIL::Rx1011;
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64v2i64i32r: // case AMDIL::GLOBALTRUNCSTORE64v2i64i32i:
+ case AMDIL::LOCALTRUNCSTORE64v2i64i32r: // case AMDIL::LOCALTRUNCSTORE64v2i64i32i:
+ case AMDIL::REGIONTRUNCSTORE64v2i64i32r: // case AMDIL::REGIONTRUNCSTORE64v2i64i32i:
+ case AMDIL::PRIVATETRUNCSTORE64v2i64i32r: // case AMDIL::PRIVATETRUNCSTORE64v2i64i32i:
+ case AMDIL::GLOBALTRUNCSTOREv2i64i32r: // case AMDIL::GLOBALTRUNCSTOREv2i64i32i:
+ case AMDIL::LOCALTRUNCSTOREv2i64i32r: // case AMDIL::LOCALTRUNCSTOREv2i64i32i:
+ case AMDIL::REGIONTRUNCSTOREv2i64i32r: // case AMDIL::REGIONTRUNCSTOREv2i64i32i:
+ case AMDIL::PRIVATETRUNCSTOREv2i64i32r: // case AMDIL::PRIVATETRUNCSTOREv2i64i32i:
+ BuildMI(*mBB, MI, DL,
+ mTII->get(AMDIL::LLOv2i64r), AMDIL::Rxy1011)
+ .addReg(dataReg);
+ dataReg = AMDIL::Rxy1011;
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64f64f32r: // case AMDIL::GLOBALTRUNCSTORE64f64f32i:
+ case AMDIL::LOCALTRUNCSTORE64f64f32r: // case AMDIL::LOCALTRUNCSTORE64f64f32i:
+ case AMDIL::REGIONTRUNCSTORE64f64f32r: // case AMDIL::REGIONTRUNCSTORE64f64f32i:
+ case AMDIL::PRIVATETRUNCSTORE64f64f32r: // case AMDIL::PRIVATETRUNCSTORE64f64f32i:
+ case AMDIL::GLOBALTRUNCSTOREf64f32r: // case AMDIL::GLOBALTRUNCSTOREf64f32i:
+ case AMDIL::LOCALTRUNCSTOREf64f32r: // case AMDIL::LOCALTRUNCSTOREf64f32i:
+ case AMDIL::REGIONTRUNCSTOREf64f32r: // case AMDIL::REGIONTRUNCSTOREf64f32i:
+ case AMDIL::PRIVATETRUNCSTOREf64f32r: // case AMDIL::PRIVATETRUNCSTOREf64f32i:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOFr),
+ AMDIL::Rx1011).addReg(dataReg);
+ dataReg = AMDIL::Rx1011;
+ break;
+ case AMDIL::GLOBALTRUNCSTORE64v2f64f32r: // case AMDIL::GLOBALTRUNCSTORE64v2f64f32i:
+ case AMDIL::LOCALTRUNCSTORE64v2f64f32r: // case AMDIL::LOCALTRUNCSTORE64v2f64f32i:
+ case AMDIL::REGIONTRUNCSTORE64v2f64f32r: // case AMDIL::REGIONTRUNCSTORE64v2f64f32i:
+ case AMDIL::PRIVATETRUNCSTORE64v2f64f32r: // case AMDIL::PRIVATETRUNCSTORE64v2f64f32i:
+ case AMDIL::GLOBALTRUNCSTOREv2f64f32r: // case AMDIL::GLOBALTRUNCSTOREv2f64f32i:
+ case AMDIL::LOCALTRUNCSTOREv2f64f32r: // case AMDIL::LOCALTRUNCSTOREv2f64f32i:
+ case AMDIL::REGIONTRUNCSTOREv2f64f32r: // case AMDIL::REGIONTRUNCSTOREv2f64f32i:
+ case AMDIL::PRIVATETRUNCSTOREv2f64f32r: // case AMDIL::PRIVATETRUNCSTOREv2f64f32i:
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOFr),
+ AMDIL::Rx1011).addReg(getCompReg(dataReg, sub_xy_comp));
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::DTOFr),
+ AMDIL::Ry1011).addReg(getCompReg(dataReg, sub_zw_comp));
+ dataReg = AMDIL::Rxy1011;
break;
}
}
+uint32_t
+AMDILIOExpansionImpl::getPackedReg(uint32_t &dataReg, uint32_t id)
+{
+ switch (id) {
+ default:
+ return dataReg;
+ case UNPACK_V2I8:
+ case UNPACK_V2I16:
+ case UNPACK_V4I8:
+ return getCompReg(dataReg, sub_x_comp, sub_z_comp);
+ case UNPACK_V4I16:
+ return getCompReg(dataReg, sub_xy_comp, sub_zw_comp);
+ }
+}
void
-AMDILIOExpansion::expandAddressCalc(MachineInstr *MI)
+AMDILIOExpansionImpl::expandAddressCalc(MachineInstr *MI, uint32_t &addyReg)
{
if (!isAddrCalcInstr(MI)) {
return;
}
DebugLoc DL = MI->getDebugLoc();
- bool is64bit = is64bitLSOp(TM, MI);
- uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
- uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
- switch(MI->getOpcode()) {
- ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE)
- ExpandCaseToAllTruncTypes(AMDIL::PRIVATETRUNCSTORE64)
- ExpandCaseToAllTypes(AMDIL::PRIVATESTORE)
- ExpandCaseToAllTypes(AMDIL::PRIVATELOAD)
- ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::PRIVATESTORE64)
- ExpandCaseToAllTypes(AMDIL::PRIVATELOAD64)
- ExpandCaseToAllTypes(AMDIL::PRIVATESEXTLOAD64)
- ExpandCaseToAllTypes(AMDIL::PRIVATEZEXTLOAD64)
- ExpandCaseToAllTypes(AMDIL::PRIVATEAEXTLOAD64)
+ bool is64bit = is64bitLSOp(MI);
+ uint32_t newReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+ uint32_t addInst = (is64bit) ? AMDIL::ADDi64rr : AMDIL::ADDi32rr;
+ if (isPrivateInst(MI) && (isPtrLoadInst(MI)
+ || (isPtrStoreInst(MI)
+ && mSTM->device()->usesSoftware(AMDILDeviceInfo
+ ::PrivateMem))))
+ {
BuildMI(*mBB, MI, DL, mTII->get(addInst),
- addyReg).addReg(addyReg).addReg(AMDIL::T1);
- break;
- ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE)
- ExpandCaseToAllTypes(AMDIL::LOCALLOAD)
- ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::LOCALSTORE)
- ExpandCaseToAllTruncTypes(AMDIL::LOCALTRUNCSTORE64)
- ExpandCaseToAllTypes(AMDIL::LOCALLOAD64)
- ExpandCaseToAllTypes(AMDIL::LOCALSEXTLOAD64)
- ExpandCaseToAllTypes(AMDIL::LOCALZEXTLOAD64)
- ExpandCaseToAllTypes(AMDIL::LOCALAEXTLOAD64)
- ExpandCaseToAllTypes(AMDIL::LOCALSTORE64)
+ newReg).addReg(addyReg).addReg(AMDIL::T1);
+ addyReg = newReg;
+ } else if (isLocalInst(MI) && (isPtrStoreInst(MI) || isPtrLoadInst(MI))) {
BuildMI(*mBB, MI, DL, mTII->get(addInst),
- addyReg).addReg(addyReg).addReg(AMDIL::T2);
- break;
- ExpandCaseToAllTypes(AMDIL::CPOOLLOAD)
- ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD)
- ExpandCaseToAllTypes(AMDIL::CPOOLLOAD64)
- ExpandCaseToAllTypes(AMDIL::CPOOLSEXTLOAD64)
- ExpandCaseToAllTypes(AMDIL::CPOOLZEXTLOAD64)
- ExpandCaseToAllTypes(AMDIL::CPOOLAEXTLOAD64)
+ newReg).addReg(addyReg).addReg(AMDIL::T2);
+ addyReg = newReg;
+ } else if (isConstantPoolInst(MI) && isPtrLoadInst(MI) &&
+ MI->getOperand(1).isReg()) {
BuildMI(*mBB, MI, DL, mTII->get(addInst),
- addyReg).addReg(addyReg).addReg(AMDIL::SDP);
- break;
- default:
- return;
+ newReg).addReg(addyReg).addReg(AMDIL::SDP);
+ addyReg = newReg;
}
}
void
-AMDILIOExpansion::expandLoadStartCode(MachineInstr *MI)
+AMDILIOExpansionImpl::expandLoadStartCode(MachineInstr *MI, uint32_t &addyReg)
{
DebugLoc DL = MI->getDebugLoc();
- bool is64bit = is64bitLSOp(TM, MI);
- uint32_t addyReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
- uint32_t addInst = (is64bit) ? AMDIL::LADD_i64 : AMDIL::ADD_i32;
- uint32_t moveInst = (is64bit) ? AMDIL::MOVE_i64 : AMDIL::MOVE_i32;
+ bool is64bit = is64bitLSOp(MI);
if (MI->getOperand(2).isReg()) {
+ uint32_t newReg = (is64bit) ? AMDIL::Rxy1010 : AMDIL::Rx1010;
+ uint32_t addInst = (is64bit) ? AMDIL::ADDi64rr : AMDIL::ADDi32rr;
BuildMI(*mBB, MI, DL, mTII->get(addInst),
- addyReg).addReg(MI->getOperand(1).getReg())
+ newReg).addReg(addyReg)
.addReg(MI->getOperand(2).getReg());
- } else {
- BuildMI(*mBB, MI, DL, mTII->get(moveInst),
- addyReg).addReg(MI->getOperand(1).getReg());
+ addyReg = newReg;
}
- MI->getOperand(1).setReg(addyReg);
- expandAddressCalc(MI);
+ expandAddressCalc(MI, addyReg);
}
void
-AMDILIOExpansion::emitStaticCPLoad(MachineInstr* MI, int swizzle,
- int id, bool ExtFPLoad)
+AMDILIOExpansionImpl::emitStaticCPLoad(MachineInstr* MI,
+ int swizzle,
+ int id,
+ bool ExtFPLoad,
+ uint32_t &dataReg)
{
DebugLoc DL = MI->getDebugLoc();
switch(swizzle) {
default:
BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
- ? AMDIL::DTOF : AMDIL::MOVE_i32),
- MI->getOperand(0).getReg())
+ ? AMDIL::DTOFr : AMDIL::COPY),
+ dataReg)
.addImm(id);
break;
case 1:
case 2:
case 3:
BuildMI(*mBB, MI, DL, mTII->get(ExtFPLoad
- ? AMDIL::DTOF : AMDIL::MOVE_i32), AMDIL::Rx1001)
+ ? AMDIL::DTOFr : AMDIL::COPY),
+ AMDIL::Rx1001)
.addImm(id);
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERT_v4i32),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(0).getReg())
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::VINSERTv4i32rr),
+ dataReg)
+ .addReg(dataReg)
.addReg(AMDIL::Rx1001)
.addImm(swizzle + 1);
break;
};
}
void
-AMDILIOExpansion::emitCPInst(MachineInstr* MI,
- const Constant* C, AMDILKernelManager* KM, int swizzle, bool ExtFPLoad)
+AMDILIOExpansionImpl::emitCPInst(MachineInstr* MI,
+ const Constant* C,
+ AMDILKernelManager* KM,
+ int swizzle,
+ bool ExtFPLoad,
+ uint32_t &dataReg)
{
if (const ConstantFP* CFP = dyn_cast<ConstantFP>(C)) {
if (CFP->getType()->isFloatTy()) {
@@ -1252,7 +911,7 @@
}
id = mMFI->addi64Literal(conv.ul);
}
- emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+ emitStaticCPLoad(MI, swizzle, id, ExtFPLoad, dataReg);
} else {
const APFloat &APF = CFP->getValueAPF();
union ftol_union {
@@ -1266,11 +925,11 @@
} else {
conv.d = APF.convertToDouble();
}
- uint32_t id = mMFI->getLongLits(conv.ul);
+ uint32_t id = mMFI->getLitIdx(conv.ul);
if (!id) {
- id = mMFI->getIntLits((uint32_t)conv.ul);
+ id = mMFI->getLitIdx((uint32_t)conv.ul);
}
- emitStaticCPLoad(MI, swizzle, id, ExtFPLoad);
+ emitStaticCPLoad(MI, swizzle, id, ExtFPLoad, dataReg);
}
} else if (const ConstantInt* CI = dyn_cast<ConstantInt>(C)) {
int64_t val = 0;
@@ -1278,9 +937,11 @@
val = CI->getSExtValue();
}
if (CI->getBitWidth() == 64) {
- emitStaticCPLoad(MI, swizzle, mMFI->addi64Literal(val), ExtFPLoad);
+ emitStaticCPLoad(MI, swizzle, mMFI->addi64Literal(
+ val), ExtFPLoad, dataReg);
} else {
- emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(val), ExtFPLoad);
+ emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(
+ val), ExtFPLoad, dataReg);
}
} else if (const ConstantArray* CA = dyn_cast<ConstantArray>(C)) {
uint32_t size = CA->getNumOperands();
@@ -1289,12 +950,12 @@
size = 4;
}
for (uint32_t x = 0; x < size; ++x) {
- emitCPInst(MI, CA->getOperand(0), KM, x, ExtFPLoad);
+ emitCPInst(MI, CA->getOperand(0), KM, x, ExtFPLoad, dataReg);
}
} else if (const ConstantAggregateZero* CAZ
- = dyn_cast<ConstantAggregateZero>(C)) {
+ = dyn_cast<ConstantAggregateZero>(C)) {
if (CAZ->isNullValue()) {
- emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(0), ExtFPLoad);
+ emitStaticCPLoad(MI, swizzle, mMFI->addi32Literal(0), ExtFPLoad, dataReg);
}
} else if (const ConstantStruct* CS = dyn_cast<ConstantStruct>(C)) {
uint32_t size = CS->getNumOperands();
@@ -1303,7 +964,7 @@
size = 4;
}
for (uint32_t x = 0; x < size; ++x) {
- emitCPInst(MI, CS->getOperand(0), KM, x, ExtFPLoad);
+ emitCPInst(MI, CS->getOperand(0), KM, x, ExtFPLoad, dataReg);
}
} else if (const ConstantVector* CV = dyn_cast<ConstantVector>(C)) {
// TODO: Make this handle vectors natively up to the correct
@@ -1314,7 +975,7 @@
size = 4;
}
for (uint32_t x = 0; x < size; ++x) {
- emitCPInst(MI, CV->getOperand(0), KM, x, ExtFPLoad);
+ emitCPInst(MI, CV->getOperand(0), KM, x, ExtFPLoad, dataReg);
}
} else if (const ConstantDataVector* CV = dyn_cast<ConstantDataVector>(C)) {
// TODO: Make this handle vectors natively up to the correct
@@ -1325,7 +986,7 @@
size = 4;
}
for (uint32_t x = 0; x < size; ++x) {
- emitCPInst(MI, CV->getElementAsConstant(0), KM, x, ExtFPLoad);
+ emitCPInst(MI, CV->getElementAsConstant(0), KM, x, ExtFPLoad, dataReg);
}
} else {
// TODO: Do we really need to handle ConstantPointerNull?
@@ -1334,4 +995,19 @@
assert(0 && "Found a constant type that I don't know how to handle");
}
}
-
+uint32_t
+AMDILIOExpansionImpl::getCompReg(uint32_t reg,
+ uint32_t subIdx0,
+ uint32_t subIdx1)
+{
+ uint32_t subreg = mTRI->getSubReg(reg, subIdx0);
+ if (!subreg) {
+ subreg = mTRI->getSubReg(reg, subIdx1);
+ }
+ assert(subreg
+ && "Found a case where the register does not have either sub-index!");
+ // Just incase we hit this assert, lets as least use a valid register so
+ // we don't have possible crashes in release mode.
+ if (!subreg) subreg = reg;
+ return subreg;
+}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIOExpansion.h Wed Sep 12 12:43:34 2012
@@ -34,8 +34,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
-namespace llvm
-{
+namespace llvm {
class MachineFunction;
class AMDILKernelManager;
class AMDILMachineFunctionInfo;
@@ -43,6 +42,7 @@
class MachineInstr;
class Constant;
class TargetInstrInfo;
+class TargetRegisterInfo;
typedef enum {
NO_PACKING = 0,
PACK_V2I8,
@@ -55,15 +55,14 @@
UNPACK_V4I16,
UNPACK_LAST
} REG_PACKED_TYPE;
-class AMDILIOExpansion : public MachineFunctionPass
+class AMDILIOExpansionImpl
{
public:
- virtual ~AMDILIOExpansion();
- virtual const char* getPassName() const;
- bool runOnMachineFunction(MachineFunction &MF);
- static char ID;
+ virtual ~AMDILIOExpansionImpl() {
+ };
+ bool run();
protected:
- AMDILIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ AMDILIOExpansionImpl(MachineFunction& mf);
//
// @param MI Machine instruction to check.
// @brief checks to see if the machine instruction
@@ -105,8 +104,8 @@
isHardwareRegion(MachineInstr *MI);
bool
isHardwareLocal(MachineInstr *MI);
- bool
- isPackedData(MachineInstr *MI);
+ uint32_t
+ getPackedReg(uint32_t &, uint32_t);
bool
isStaticCPLoad(MachineInstr *MI);
bool
@@ -121,58 +120,73 @@
getShiftSize(MachineInstr *MI);
uint32_t
getPointerID(MachineInstr *MI);
- uint32_t
- getDataReg(MachineInstr *MI);
void
- expandTruncData(MachineInstr *MI);
+ expandTruncData(MachineInstr *MI, uint32_t &dataReg);
void
- expandLoadStartCode(MachineInstr *MI);
+ expandLoadStartCode(MachineInstr *MI, uint32_t &addyReg);
virtual void
- expandStoreSetupCode(MachineInstr *MI) = 0;
+ expandStoreSetupCode(MachineInstr *MI, uint32_t &addyReg,
+ uint32_t &dataReg) = 0;
+ void
+ expandAddressCalc(MachineInstr *MI, uint32_t &addyReg);
void
- expandAddressCalc(MachineInstr *MI);
- unsigned
expandLongExtend(MachineInstr *MI,
- uint32_t numComponents, uint32_t size, bool signedShift);
- unsigned
+ uint32_t numComponents,
+ uint32_t size,
+ bool signedShift,
+ uint32_t &dataReg);
+ void
expandLongExtendSub32(MachineInstr *MI,
- unsigned SHLop, unsigned SHRop, unsigned USHRop,
- unsigned SHLimm, uint64_t SHRimm, unsigned USHRimm,
- unsigned LCRop, bool signedShift, bool vec2);
- unsigned
+ unsigned SHLop,
+ unsigned SHRop,
+ unsigned USHRop,
+ unsigned SHLimm,
+ uint64_t SHRimm,
+ unsigned USHRimm,
+ unsigned LCRop,
+ bool signedShift,
+ bool vec2,
+ uint32_t &dataReg);
+ void
expandIntegerExtend(MachineInstr *MI, unsigned,
unsigned, unsigned, unsigned);
- unsigned
- expandExtendLoad(MachineInstr *MI);
+ void
+ expandExtendLoad(MachineInstr *MI, uint32_t &dataReg);
virtual void
- expandPackedData(MachineInstr *MI) = 0;
+ expandPackedData(MachineInstr *MI, uint32_t &dataReg) = 0;
void
emitCPInst(MachineInstr* MI, const Constant* C,
- AMDILKernelManager* KM, int swizzle, bool ExtFPLoad);
+ AMDILKernelManager* KM, int swizzle, bool ExtFPLoad,
+ uint32_t &dataReg);
bool mDebug;
+ MachineFunction& MF;
+ MachineBasicBlock *mBB;
+ const TargetMachine &TM;
const AMDILSubtarget *mSTM;
AMDILKernelManager *mKM;
- MachineBasicBlock *mBB;
AMDILMachineFunctionInfo *mMFI;
+ const TargetRegisterInfo *mTRI;
const TargetInstrInfo *mTII;
bool saveInst;
protected:
void
emitStaticCPLoad(MachineInstr* MI, int swizzle, int id,
- bool ExtFPLoad);
- TargetMachine &TM;
-}; // class AMDILIOExpansion
+ bool ExtFPLoad, uint32_t &dataReg);
+ uint32_t getCompReg(uint32_t reg,
+ uint32_t subIdx0 = 0, uint32_t subIdx1 = 0);
+}; // class AMDILIOExpansionImpl
// Intermediate class that holds I/O code expansion that is common to the
// 7XX, Evergreen and Northern Island family of chips.
-class AMDIL789IOExpansion : public AMDILIOExpansion
-{
+class AMDIL789IOExpansionImpl : public AMDILIOExpansionImpl {
public:
- virtual ~AMDIL789IOExpansion();
- virtual const char* getPassName() const;
+ virtual ~AMDIL789IOExpansionImpl() {
+ };
protected:
- AMDIL789IOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ AMDIL789IOExpansionImpl(MachineFunction& mf)
+ : AMDILIOExpansionImpl(mf) {
+ };
virtual void
expandGlobalStore(MachineInstr *MI) = 0;
virtual void
@@ -190,29 +204,33 @@
virtual void
expandConstantLoad(MachineInstr *MI);
virtual void
- expandPrivateLoad(MachineInstr *MI) ;
+ expandPrivateLoad(MachineInstr *MI);
virtual void
expandConstantPoolLoad(MachineInstr *MI);
void
- expandStoreSetupCode(MachineInstr *MI);
+ expandStoreSetupCode(MachineInstr *MI, uint32_t &addyReg, uint32_t &dataReg);
virtual void
- expandPackedData(MachineInstr *MI);
+ expandPackedData(MachineInstr *MI, uint32_t &dataReg);
private:
void emitVectorAddressCalc(MachineInstr *MI, bool is32bit,
- bool needsSelect);
- void emitVectorSwitchWrite(MachineInstr *MI, bool is32bit);
+ bool needsSelect, uint32_t &addy);
+ void emitVectorSwitchWrite(MachineInstr *MI,
+ bool is32bit,
+ uint32_t &addy,
+ uint32_t &data);
void emitComponentExtract(MachineInstr *MI, unsigned src,
unsigned dst, bool beforeInst);
- void emitDataLoadSelect(MachineInstr *MI);
-}; // class AMDIL789IOExpansion
-// Class that handles I/O emission for the 7XX family of devices.
-class AMDIL7XXIOExpansion : public AMDIL789IOExpansion
-{
+ void emitDataLoadSelect(MachineInstr *MI, uint32_t &data, uint32_t &addy);
+}; // class AMDIL789IOExpansionImpl
+ // Class that handles I/O emission for the 7XX family of devices.
+class AMDIL7XXIOExpansionImpl : public AMDIL789IOExpansionImpl {
public:
- AMDIL7XXIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ AMDIL7XXIOExpansionImpl(MachineFunction& mf)
+ : AMDIL789IOExpansionImpl(mf) {
+ };
- ~AMDIL7XXIOExpansion();
- const char* getPassName() const;
+ ~AMDIL7XXIOExpansionImpl() {
+ };
protected:
void
expandGlobalStore(MachineInstr *MI);
@@ -226,16 +244,18 @@
expandRegionLoad(MachineInstr *MI);
void
expandLocalLoad(MachineInstr *MI);
-}; // class AMDIL7XXIOExpansion
+}; // class AMDIL7XXIOExpansionImpl
// Class that handles image functions to expand them into the
// correct set of I/O instructions.
-class AMDILImageExpansion : public AMDIL789IOExpansion
-{
+class AMDILImageExpansionImpl : public AMDIL789IOExpansionImpl {
public:
- AMDILImageExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ AMDILImageExpansionImpl(MachineFunction& mf)
+ : AMDIL789IOExpansionImpl(mf) {
+ };
- virtual ~AMDILImageExpansion();
+ virtual ~AMDILImageExpansionImpl() {
+ };
protected:
//
// @param MI Instruction iterator that has the sample instruction
@@ -281,27 +301,23 @@
//
virtual void
expandInefficientImageLoad(MachineBasicBlock *BB, MachineInstr *MI);
-private:
- AMDILImageExpansion(); // Do not implement.
-
-}; // class AMDILImageExpansion
+}; // class AMDILImageExpansion
// Class that expands IO instructions for Evergreen and Northern
// Island family of devices.
-class AMDILEGIOExpansion : public AMDILImageExpansion
-{
+class AMDILEGIOExpansionImpl : public AMDILImageExpansionImpl {
public:
- AMDILEGIOExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel);
+ AMDILEGIOExpansionImpl(MachineFunction& mf)
+ : AMDILImageExpansionImpl(mf) {
+ };
- virtual ~AMDILEGIOExpansion();
- const char* getPassName() const;
+ virtual ~AMDILEGIOExpansionImpl() {
+ };
protected:
virtual bool
isIOInstruction(MachineInstr *MI);
virtual void
expandIOInstruction(MachineInstr *MI);
- bool
- isImageIO(MachineInstr *MI);
virtual void
expandGlobalStore(MachineInstr *MI);
void
@@ -317,14 +333,30 @@
virtual bool
isCacheableOp(MachineInstr *MI);
void
- expandStoreSetupCode(MachineInstr *MI);
- void
- expandPackedData(MachineInstr *MI);
+ expandPackedData(MachineInstr *MI, uint32_t &dataReg);
private:
bool
isArenaOp(MachineInstr *MI);
void
- expandArenaSetup(MachineInstr *MI);
-}; // class AMDILEGIOExpansion
+ expandArenaSetup(MachineInstr *MI, uint32_t &addy);
+}; // class AMDILEGIOExpansionImpl
+
+class AMDIL7XXIOExpansion : public MachineFunctionPass {
+public:
+ static char ID;
+public:
+ AMDIL7XXIOExpansion();
+ virtual const char* getPassName() const;
+ bool runOnMachineFunction(MachineFunction &MF);
+};
+
+class AMDILEGIOExpansion : public MachineFunctionPass {
+public:
+ static char ID;
+public:
+ AMDILEGIOExpansion();
+ virtual const char* getPassName() const;
+ bool runOnMachineFunction(MachineFunction &MF);
+};
} // namespace llvm
#endif // _AMDILIOEXPANSION_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp Wed Sep 12 12:43:34 2012
@@ -28,18 +28,18 @@
// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
// //for SelectionDAG operations.
//
-namespace
-{
-class AMDILDAGToDAGISel : public SelectionDAGISel
-{
- // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
- // make the right decision when generating code for different targets.
- const AMDILSubtarget *Subtarget;
+namespace {
+class AMDILDAGToDAGISel : public SelectionDAGISel {
+// Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
+// make the right decision when generating code for different targets.
+const AMDILSubtarget *Subtarget;
public:
explicit AMDILDAGToDAGISel(AMDILTargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel),
- Subtarget(&TM.getSubtarget<AMDILSubtarget>()) {}
- virtual ~AMDILDAGToDAGISel() {};
+ Subtarget(&TM.getSubtarget<AMDILSubtarget>()) {
+ }
+ virtual ~AMDILDAGToDAGISel() {
+ };
inline SDValue getSmallIPtrImm(unsigned Imm);
SDNode *Select(SDNode *N);
@@ -76,19 +76,14 @@
// DAG, ready for instruction scheduling.
//
FunctionPass *llvm::createAMDILISelDag(AMDILTargetMachine &TM,
- llvm::CodeGenOpt::Level OptLevel)
-{
+ llvm::CodeGenOpt::Level OptLevel) {
return new AMDILDAGToDAGISel(TM, OptLevel);
}
-
-SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm)
-{
+SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
-
bool AMDILDAGToDAGISel::SelectADDR(
- SDValue Addr, SDValue& R1, SDValue& R2)
-{
+ SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
return false;
@@ -111,11 +106,8 @@
}
return true;
}
-
-
bool AMDILDAGToDAGISel::SelectADDR64(
- SDValue Addr, SDValue& R1, SDValue& R2)
-{
+ SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
return false;
@@ -123,7 +115,7 @@
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R1 = Addr;
R2 = CurDAG->getTargetConstant(0, MVT::i64);
} else {
R1 = Addr;
@@ -138,26 +130,23 @@
}
return true;
}
-
-SDNode *AMDILDAGToDAGISel::Select(SDNode *N)
-{
+SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
return NULL; // Already selected.
}
- switch (Opc) {
- default:
- break;
- case ISD::FrameIndex: {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
- unsigned int FI = FIN->getIndex();
- EVT OpVT = N->getValueType(0);
- unsigned int NewOpc = AMDIL::MOVE_i32;
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
- return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
- }
- }
- break;
+ if (Opc == ISD::FrameIndex
+ && dyn_cast<FrameIndexSDNode>(N)) {
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
+ FI->getValueType(0));
+ return CurDAG->SelectNodeTo(N, AMDIL::LOADFIi32, FI->getValueType(0), TFI);
+ /*
+ *
+ return CurDAG->getNode(ISD::ADD, N->getDebugLoc(), FI->getValueType(0),
+ CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)),
+ CurDAG->getConstant(0, FI->getValueType(0)));
+ */
}
// For all atomic instructions, we need to add a constant
// operand that stores the resource ID in the instruction
@@ -166,50 +155,37 @@
}
return SelectCode(N);
}
-
bool AMDILDAGToDAGISel::isFlatASOverrideEnabled() const
{
return Subtarget->overridesFlatAS();
}
-
-bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) const {
return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
&& !isFlatASOverrideEnabled();
}
-
-bool AMDILDAGToDAGISel::isFlatStore(const StoreSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isFlatStore(const StoreSDNode *N) const {
return check_type(N->getSrcValue(), AMDILAS::FLAT_ADDRESS)
|| (isFlatASOverrideEnabled()
&& (check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
|| check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
|| check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
|| check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS))
- );
+ );
}
-
-bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) const {
return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
&& !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
&& !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS))
&& !isFlatASOverrideEnabled();
}
-
-bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) const {
return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
&& !isFlatASOverrideEnabled();
}
-
-bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) const {
return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
}
-
-bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) const
-{
+bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) const {
if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
&& !isFlatASOverrideEnabled()) {
return true;
@@ -228,38 +204,28 @@
return false;
}
}
-
-bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
&& !isFlatASOverrideEnabled();
}
-
-bool AMDILDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const {
return check_type(N->getSrcValue(), AMDILAS::FLAT_ADDRESS)
|| (isFlatASOverrideEnabled()
&& (check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
|| check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
|| check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
|| check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS))
- );
+ );
}
-
-bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
&& !isFlatASOverrideEnabled();
}
-
-bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
&& !isFlatASOverrideEnabled();
}
-
-bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
MachineMemOperand *MMO = N->getMemOperand();
if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
&& !isFlatASOverrideEnabled()) {
@@ -273,9 +239,7 @@
}
return false;
}
-
-bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const
-{
+bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)
&& !isFlatASOverrideEnabled()) {
// Check to make sure we are not a constant pool load or a constant load
@@ -288,17 +252,15 @@
&& !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
&& !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
&& !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
- && !isFlatASOverrideEnabled()) {
+ && !isFlatASOverrideEnabled())
+ {
return true;
}
return false;
}
-
-const char *AMDILDAGToDAGISel::getPassName() const
-{
+const char *AMDILDAGToDAGISel::getPassName() const {
return "AMDIL DAG->DAG Pattern Instruction Selection";
}
-
SDNode*
AMDILDAGToDAGISel::xformAtomicInst(SDNode *N)
{
@@ -306,8 +268,7 @@
bool addOne = false;
unsigned opc = N->getOpcode();
switch (opc) {
- default:
- return N;
+ default: return N;
case AMDILISD::ATOM_G_ADD:
case AMDILISD::ATOM_G_AND:
case AMDILISD::ATOM_G_MAX:
@@ -319,6 +280,8 @@
case AMDILISD::ATOM_G_RSUB:
case AMDILISD::ATOM_G_XCHG:
case AMDILISD::ATOM_G_XOR:
+ case AMDILISD::ATOM_G_LOAD:
+ case AMDILISD::ATOM_G_STORE:
case AMDILISD::ATOM_G_ADD_NORET:
case AMDILISD::ATOM_G_AND_NORET:
case AMDILISD::ATOM_G_MAX_NORET:
@@ -382,100 +345,19 @@
case AMDILISD::ATOM_R_CMPXCHG_NORET:
break;
case AMDILISD::ATOM_G_DEC:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_G_SUB;
- }
- break;
case AMDILISD::ATOM_G_INC:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_G_ADD;
- }
- break;
case AMDILISD::ATOM_G_DEC_NORET:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_G_SUB_NORET;
- }
- break;
case AMDILISD::ATOM_G_INC_NORET:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_G_ADD_NORET;
- }
- break;
case AMDILISD::ATOM_L_DEC:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_L_SUB;
- }
- break;
case AMDILISD::ATOM_L_INC:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_L_ADD;
- }
- break;
case AMDILISD::ATOM_L_DEC_NORET:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_L_SUB_NORET;
- }
- break;
case AMDILISD::ATOM_L_INC_NORET:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_L_ADD_NORET;
- }
- break;
case AMDILISD::ATOM_R_DEC:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_R_SUB;
- }
- break;
case AMDILISD::ATOM_R_INC:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_R_ADD;
- }
- break;
case AMDILISD::ATOM_R_DEC_NORET:
- addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_R_SUB;
- }
- break;
case AMDILISD::ATOM_R_INC_NORET:
addOne = true;
- if (Subtarget->calVersion() >= CAL_VERSION_SC_136) {
- addVal = (uint32_t)-1;
- } else {
- opc = AMDILISD::ATOM_R_ADD_NORET;
- }
+ addVal = (uint32_t)-1;
break;
}
// The largest we can have is a cmpxchg w/ a return value and an output chain.
@@ -488,17 +370,18 @@
Ops[x] = N->getOperand(x);
}
if (addOne) {
- Ops[x++] = SDValue(SelectCode(CurDAG->getConstant(addVal, MVT::i32).getNode()), 0);
+ Ops[x++] = SDValue(SelectCode(CurDAG->getConstant(addVal, MVT::i32).getNode(
+ )), 0);
}
Ops[x++] = CurDAG->getTargetConstant(0, MVT::i32);
SDVTList Tys = N->getVTList();
MemSDNode *MemNode = dyn_cast<MemSDNode>(N);
assert(MemNode && "Atomic should be of MemSDNode type!");
N = CurDAG->getMemIntrinsicNode(opc, N->getDebugLoc(), Tys, Ops, x,
- MemNode->getMemoryVT(), MemNode->getMemOperand()).getNode();
+ MemNode->getMemoryVT(),
+ MemNode->getMemOperand()).getNode();
return N;
}
-
#ifdef DEBUGTMP
#undef INT64_C
#endif
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.cpp Wed Sep 12 12:43:34 2012
@@ -47,586 +47,6 @@
//===----------------------------------------------------------------------===//
// TargetLowering Implementation Help Functions Begin
//===----------------------------------------------------------------------===//
-static SDValue
-getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
-{
- DebugLoc DL = Src.getDebugLoc();
- EVT svt = Src.getValueType().getScalarType();
- EVT dvt = Dst.getValueType().getScalarType();
- if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
- if (dvt.bitsGT(svt)) {
- Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
- } else if (svt.bitsLT(svt)) {
- Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
- DAG.getConstant(1, MVT::i32));
- }
- } else if (svt.isInteger() && dvt.isInteger()) {
- if (!svt.bitsEq(dvt)) {
- Src = DAG.getSExtOrTrunc(Src, DL, dvt);
- } else {
- Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
- }
- } else if (svt.isInteger()) {
- unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
- if (!svt.bitsEq(dvt)) {
- if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
- Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
- } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
- Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
- } else {
- assert(0 && "We only support 32 and 64bit fp types");
- }
- }
- Src = DAG.getNode(opcode, DL, dvt, Src);
- } else if (dvt.isInteger()) {
- unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
- if (svt.getSimpleVT().SimpleTy == MVT::f32) {
- Src = DAG.getNode(opcode, DL, MVT::i32, Src);
- } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
- Src = DAG.getNode(opcode, DL, MVT::i64, Src);
- } else {
- assert(0 && "We only support 32 and 64bit fp types");
- }
- Src = DAG.getSExtOrTrunc(Src, DL, dvt);
- }
- return Src;
-}
-// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
-// condition.
-static AMDILCC::CondCodes
-CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
-{
- switch (CC) {
- default: {
- errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
- assert(0 && "Unknown condition code!");
- }
- case ISD::SETO:
- switch(type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_O;
- case MVT::f64:
- return AMDILCC::IL_CC_D_O;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUO:
- switch(type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_UO;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UO;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETGT:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_GT;
- case MVT::f32:
- return AMDILCC::IL_CC_F_GT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_GT;
- case MVT::i64:
- return AMDILCC::IL_CC_L_GT;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETGE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_GE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_GE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_GE;
- case MVT::i64:
- return AMDILCC::IL_CC_L_GE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETLT:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_LT;
- case MVT::f32:
- return AMDILCC::IL_CC_F_LT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_LT;
- case MVT::i64:
- return AMDILCC::IL_CC_L_LT;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETLE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_LE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_LE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_LE;
- case MVT::i64:
- return AMDILCC::IL_CC_L_LE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETNE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_NE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_NE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_NE;
- case MVT::i64:
- return AMDILCC::IL_CC_L_NE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETEQ:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_EQ;
- case MVT::f32:
- return AMDILCC::IL_CC_F_EQ;
- case MVT::f64:
- return AMDILCC::IL_CC_D_EQ;
- case MVT::i64:
- return AMDILCC::IL_CC_L_EQ;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUGT:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_GT;
- case MVT::f32:
- return AMDILCC::IL_CC_F_UGT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UGT;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_GT;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUGE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_GE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_UGE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UGE;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_GE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETULT:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_LT;
- case MVT::f32:
- return AMDILCC::IL_CC_F_ULT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_ULT;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_LT;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETULE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_LE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_ULE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_ULE;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_LE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUNE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_NE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_UNE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UNE;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_NE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUEQ:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_EQ;
- case MVT::f32:
- return AMDILCC::IL_CC_F_UEQ;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UEQ;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_EQ;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOGT:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OGT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OGT;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOGE:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OGE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OGE;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOLT:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OLT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OLT;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOLE:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OLE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OLE;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETONE:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_ONE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_ONE;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOEQ:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OEQ;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OEQ;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- };
-}
-
-static unsigned int
-translateToOpcode(uint64_t CCCode, unsigned int regClass)
-{
- switch (CCCode) {
- case AMDILCC::IL_CC_D_EQ:
- case AMDILCC::IL_CC_D_OEQ:
- if (regClass == AMDIL::GPRV2F64RegClassID) {
- return (unsigned int)AMDIL::DEQ_v2f64;
- } else {
- return (unsigned int)AMDIL::DEQ;
- }
- case AMDILCC::IL_CC_D_LE:
- case AMDILCC::IL_CC_D_OLE:
- case AMDILCC::IL_CC_D_ULE:
- case AMDILCC::IL_CC_D_GE:
- case AMDILCC::IL_CC_D_OGE:
- case AMDILCC::IL_CC_D_UGE:
- return (unsigned int)AMDIL::DGE;
- case AMDILCC::IL_CC_D_LT:
- case AMDILCC::IL_CC_D_OLT:
- case AMDILCC::IL_CC_D_ULT:
- case AMDILCC::IL_CC_D_GT:
- case AMDILCC::IL_CC_D_OGT:
- case AMDILCC::IL_CC_D_UGT:
- return (unsigned int)AMDIL::DLT;
- case AMDILCC::IL_CC_D_NE:
- case AMDILCC::IL_CC_D_UNE:
- return (unsigned int)AMDIL::DNE;
- case AMDILCC::IL_CC_F_EQ:
- case AMDILCC::IL_CC_F_OEQ:
- return (unsigned int)AMDIL::FEQ;
- case AMDILCC::IL_CC_F_LE:
- case AMDILCC::IL_CC_F_ULE:
- case AMDILCC::IL_CC_F_OLE:
- case AMDILCC::IL_CC_F_GE:
- case AMDILCC::IL_CC_F_UGE:
- case AMDILCC::IL_CC_F_OGE:
- return (unsigned int)AMDIL::FGE;
- case AMDILCC::IL_CC_F_LT:
- case AMDILCC::IL_CC_F_OLT:
- case AMDILCC::IL_CC_F_ULT:
- case AMDILCC::IL_CC_F_GT:
- case AMDILCC::IL_CC_F_OGT:
- case AMDILCC::IL_CC_F_UGT:
- if (regClass == AMDIL::GPRV2F32RegClassID) {
- return (unsigned int)AMDIL::FLT_v2f32;
- } else if (regClass == AMDIL::GPRV4F32RegClassID) {
- return (unsigned int)AMDIL::FLT_v4f32;
- } else {
- return (unsigned int)AMDIL::FLT;
- }
- case AMDILCC::IL_CC_F_NE:
- case AMDILCC::IL_CC_F_UNE:
- return (unsigned int)AMDIL::FNE;
- case AMDILCC::IL_CC_I_EQ:
- case AMDILCC::IL_CC_U_EQ:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::IEQ;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRV2I8RegClassID
- || regClass == AMDIL::GPRV2I16RegClassID) {
- return (unsigned int)AMDIL::IEQ_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRV4I8RegClassID
- || regClass == AMDIL::GPRV4I16RegClassID) {
- return (unsigned int)AMDIL::IEQ_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_L_EQ:
- case AMDILCC::IL_CC_UL_EQ:
- return (unsigned int)AMDIL::LEQ;
- case AMDILCC::IL_CC_I_GE:
- case AMDILCC::IL_CC_I_LE:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::IGE;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::IGE_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::IGE_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_I_LT:
- case AMDILCC::IL_CC_I_GT:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ILT;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ILT_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ILT_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_L_GE:
- return (unsigned int)AMDIL::LGE;
- case AMDILCC::IL_CC_L_LE:
- return (unsigned int)AMDIL::LLE;
- case AMDILCC::IL_CC_L_LT:
- return (unsigned int)AMDIL::LLT;
- case AMDILCC::IL_CC_L_GT:
- return (unsigned int)AMDIL::LGT;
- case AMDILCC::IL_CC_I_NE:
- case AMDILCC::IL_CC_U_NE:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::INE;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::INE_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::INE_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_U_GE:
- case AMDILCC::IL_CC_U_LE:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGE;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGE_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGE_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_L_NE:
- case AMDILCC::IL_CC_UL_NE:
- return (unsigned int)AMDIL::LNE;
- case AMDILCC::IL_CC_UL_GE:
- return (unsigned int)AMDIL::ULGE;
- case AMDILCC::IL_CC_UL_LE:
- return (unsigned int)AMDIL::ULLE;
- case AMDILCC::IL_CC_U_LT:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ULT;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ULT_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::ULT_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_U_GT:
- if (regClass == AMDIL::GPRI32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGT;
- } else if (regClass == AMDIL::GPRV2I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGT_v2i32;
- } else if (regClass == AMDIL::GPRV4I32RegClassID
- || regClass == AMDIL::GPRI8RegClassID
- || regClass == AMDIL::GPRI16RegClassID) {
- return (unsigned int)AMDIL::UGT_v4i32;
- } else {
- assert(!"Unknown reg class!");
- }
- case AMDILCC::IL_CC_UL_LT:
- return (unsigned int)AMDIL::ULLT;
- case AMDILCC::IL_CC_UL_GT:
- return (unsigned int)AMDIL::ULGT;
- case AMDILCC::IL_CC_F_UEQ:
- case AMDILCC::IL_CC_D_UEQ:
- case AMDILCC::IL_CC_F_ONE:
- case AMDILCC::IL_CC_D_ONE:
- case AMDILCC::IL_CC_F_O:
- case AMDILCC::IL_CC_F_UO:
- case AMDILCC::IL_CC_D_O:
- case AMDILCC::IL_CC_D_UO:
- // we don't care
- return 0;
-
- }
- errs()<<"Opcode: "<<CCCode<<"\n";
- assert(0 && "Unknown opcode retrieved");
- return 0;
-}
SDValue
AMDILTargetLowering::LowerMemArgument(
SDValue Chain,
@@ -650,7 +70,7 @@
// could be overwritten by lowering of arguments in case of a tail call.
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable
- );
+ );
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal())
@@ -662,418 +82,6 @@
//===----------------------------------------------------------------------===//
// TargetLowering Implementation Help Functions End
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Instruction generation functions
-//===----------------------------------------------------------------------===//
-uint32_t
-AMDILTargetLowering::addExtensionInstructions(
- uint32_t reg, bool signedShift,
- unsigned int simpleVT) const
-{
- int shiftSize = 0;
- uint32_t LShift, RShift, Promote, Demote;
- uint32_t intRegClassID;
- switch(simpleVT) {
- default:
- return reg;
- case AMDIL::GPRI8RegClassID:
- shiftSize = 24;
- LShift = AMDIL::SHL_i32;
- if (signedShift) {
- RShift = AMDIL::SHR_i32;
- } else {
- RShift = AMDIL::USHR_i32;
- }
- Promote = AMDIL::IL_ASINT_i8;
- Demote = AMDIL::IL_ASCHAR_i32;
- intRegClassID = AMDIL::GPRI32RegClassID;
- break;
- case AMDIL::GPRV2I8RegClassID:
- shiftSize = 24;
- LShift = AMDIL::SHL_v2i32;
- if (signedShift) {
- RShift = AMDIL::SHR_v2i32;
- } else {
- RShift = AMDIL::USHR_v2i32;
- }
- Promote = AMDIL::IL_ASV2INT_v2i8;
- Demote = AMDIL::IL_ASV2CHAR_v2i32;
- intRegClassID = AMDIL::GPRV2I32RegClassID;
- break;
- case AMDIL::GPRV4I8RegClassID:
- shiftSize = 24;
- LShift = AMDIL::SHL_v4i32;
- if (signedShift) {
- RShift = AMDIL::SHR_v4i32;
- } else {
- RShift = AMDIL::USHR_v4i32;
- }
- Promote = AMDIL::IL_ASV4INT_v4i8;
- Demote = AMDIL::IL_ASV4CHAR_v4i32;
- intRegClassID = AMDIL::GPRV4I32RegClassID;
- break;
- case AMDIL::GPRI16RegClassID:
- shiftSize = 16;
- LShift = AMDIL::SHL_i32;
- if (signedShift) {
- RShift = AMDIL::SHR_i32;
- } else {
- RShift = AMDIL::USHR_i32;
- }
- Promote = AMDIL::IL_ASINT_i16;
- Demote = AMDIL::IL_ASSHORT_i32;
- intRegClassID = AMDIL::GPRI32RegClassID;
- break;
- case AMDIL::GPRV2I16RegClassID:
- shiftSize = 16;
- LShift = AMDIL::SHL_v2i32;
- if (signedShift) {
- RShift = AMDIL::SHR_v2i32;
- } else {
- RShift = AMDIL::USHR_v2i32;
- }
- Promote = AMDIL::IL_ASV2INT_v2i16;
- Demote = AMDIL::IL_ASV2SHORT_v2i32;
- intRegClassID = AMDIL::GPRV2I32RegClassID;
- break;
- case AMDIL::GPRV4I16RegClassID:
- shiftSize = 16;
- LShift = AMDIL::SHL_v4i32;
- if (signedShift) {
- RShift = AMDIL::SHR_v4i32;
- } else {
- RShift = AMDIL::USHR_v4i32;
- }
- Promote = AMDIL::IL_ASV4INT_v4i16;
- Demote = AMDIL::IL_ASV4SHORT_v4i32;
- intRegClassID = AMDIL::GPRV4I32RegClassID;
- break;
- };
- uint32_t LoadReg = genVReg(simpleVT);
- uint32_t tmp1 = genVReg(intRegClassID);
- uint32_t tmp2 = genVReg(intRegClassID);
- uint32_t tmp3 = genVReg(intRegClassID);
- uint32_t dst = genVReg(simpleVT);
- generateMachineInst(Promote, tmp1, reg);
- generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
- generateMachineInst(LShift, tmp2, tmp1, LoadReg);
- generateMachineInst(RShift, tmp3, tmp2, LoadReg);
- generateMachineInst(Demote, dst, tmp3);
- return dst;
-}
-
-MachineOperand
-AMDILTargetLowering::convertToReg(MachineOperand op) const
-{
- if (op.isReg()) {
- return op;
- } else if (op.isImm()) {
- uint32_t loadReg
- = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
- generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
- .addImm(op.getImm());
- op.ChangeToRegister(loadReg, false);
- } else if (op.isFPImm()) {
- uint32_t loadReg
- = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
- generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
- .addFPImm(op.getFPImm());
- op.ChangeToRegister(loadReg, false);
- } else if (op.isMBB()) {
- op.ChangeToRegister(0, false);
- } else if (op.isFI()) {
- op.ChangeToRegister(0, false);
- } else if (op.isCPI()) {
- op.ChangeToRegister(0, false);
- } else if (op.isJTI()) {
- op.ChangeToRegister(0, false);
- } else if (op.isGlobal()) {
- op.ChangeToRegister(0, false);
- } else if (op.isSymbol()) {
- op.ChangeToRegister(0, false);
- }/* else if (op.isMetadata()) {
- op.ChangeToRegister(0, false);
- }*/
- return op;
-}
-
-void
-AMDILTargetLowering::generateCMPInstr(
- MachineInstr *MI,
- MachineBasicBlock *BB,
- const TargetInstrInfo& TII)
-const
-{
- MachineOperand DST = MI->getOperand(0);
- MachineOperand CC = MI->getOperand(1);
- MachineOperand LHS = MI->getOperand(2);
- MachineOperand RHS = MI->getOperand(3);
- int64_t ccCode = CC.getImm();
- unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
- unsigned int opCode = translateToOpcode(ccCode, simpleVT);
- DebugLoc DL = MI->getDebugLoc();
- MachineBasicBlock::iterator BBI = MI;
- setPrivateData(BB, BBI, &DL, &TII);
- if (!LHS.isReg()) {
- LHS = convertToReg(LHS);
- }
- if (!RHS.isReg()) {
- RHS = convertToReg(RHS);
- }
- uint32_t lhsreg = LHS.getReg();
- uint32_t rhsreg = RHS.getReg();
- switch (ccCode) {
- case AMDILCC::IL_CC_I_EQ:
- case AMDILCC::IL_CC_I_NE:
- case AMDILCC::IL_CC_I_GE:
- case AMDILCC::IL_CC_I_LT:
- case AMDILCC::IL_CC_I_GT:
- case AMDILCC::IL_CC_I_LE: {
- lhsreg = addExtensionInstructions(lhsreg, true, simpleVT);
- rhsreg = addExtensionInstructions(rhsreg, true, simpleVT);
- }
- break;
- case AMDILCC::IL_CC_U_EQ:
- case AMDILCC::IL_CC_U_NE:
- case AMDILCC::IL_CC_U_GE:
- case AMDILCC::IL_CC_U_LT:
- case AMDILCC::IL_CC_U_GT:
- case AMDILCC::IL_CC_U_LE: {
- lhsreg = addExtensionInstructions(lhsreg, false, simpleVT);
- rhsreg = addExtensionInstructions(rhsreg, false, simpleVT);
- }
- break;
- }
-
- switch (ccCode) {
- case AMDILCC::IL_CC_I_EQ:
- case AMDILCC::IL_CC_I_NE:
- case AMDILCC::IL_CC_I_GE:
- case AMDILCC::IL_CC_I_LT:
- case AMDILCC::IL_CC_U_EQ:
- case AMDILCC::IL_CC_U_NE:
- case AMDILCC::IL_CC_U_GE:
- case AMDILCC::IL_CC_U_LT:
- case AMDILCC::IL_CC_D_EQ:
- case AMDILCC::IL_CC_F_EQ:
- case AMDILCC::IL_CC_F_OEQ:
- case AMDILCC::IL_CC_D_OEQ:
- case AMDILCC::IL_CC_D_NE:
- case AMDILCC::IL_CC_F_NE:
- case AMDILCC::IL_CC_F_UNE:
- case AMDILCC::IL_CC_D_UNE:
- case AMDILCC::IL_CC_D_GE:
- case AMDILCC::IL_CC_F_GE:
- case AMDILCC::IL_CC_D_OGE:
- case AMDILCC::IL_CC_F_OGE:
- case AMDILCC::IL_CC_D_LT:
- case AMDILCC::IL_CC_F_LT:
- case AMDILCC::IL_CC_F_OLT:
- case AMDILCC::IL_CC_D_OLT:
- generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
- break;
- case AMDILCC::IL_CC_I_GT:
- case AMDILCC::IL_CC_I_LE:
- case AMDILCC::IL_CC_U_GT:
- case AMDILCC::IL_CC_U_LE:
- case AMDILCC::IL_CC_F_GT:
- case AMDILCC::IL_CC_D_GT:
- case AMDILCC::IL_CC_F_OGT:
- case AMDILCC::IL_CC_D_OGT:
- case AMDILCC::IL_CC_F_LE:
- case AMDILCC::IL_CC_D_LE:
- case AMDILCC::IL_CC_D_OLE:
- case AMDILCC::IL_CC_F_OLE:
- generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
- break;
- case AMDILCC::IL_CC_F_UGT:
- case AMDILCC::IL_CC_F_ULE: {
- uint32_t VReg[4] = {
- genVReg(simpleVT), genVReg(simpleVT),
- genVReg(simpleVT), genVReg(simpleVT)
- };
- generateMachineInst(opCode, VReg[0], rhsreg, lhsreg);
- generateMachineInst(AMDIL::FNE, VReg[1], rhsreg, rhsreg);
- generateMachineInst(AMDIL::FNE, VReg[2], lhsreg, lhsreg);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_F_ULT:
- case AMDILCC::IL_CC_F_UGE: {
- uint32_t VReg[4] = {
- genVReg(simpleVT), genVReg(simpleVT),
- genVReg(simpleVT), genVReg(simpleVT)
- };
- generateMachineInst(opCode, VReg[0], lhsreg, rhsreg);
- generateMachineInst(AMDIL::FNE, VReg[1], rhsreg, rhsreg);
- generateMachineInst(AMDIL::FNE, VReg[2], lhsreg, lhsreg);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_D_UGT:
- case AMDILCC::IL_CC_D_ULE: {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[4] = {
- genVReg(regID), genVReg(regID),
- genVReg(regID), genVReg(regID)
- };
- // The result of a double comparison is a 32bit result
- generateMachineInst(opCode, VReg[0], rhsreg, lhsreg);
- generateMachineInst(AMDIL::DNE, VReg[1], rhsreg, rhsreg);
- generateMachineInst(AMDIL::DNE, VReg[2], lhsreg, lhsreg);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_D_UGE:
- case AMDILCC::IL_CC_D_ULT: {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[4] = {
- genVReg(regID), genVReg(regID),
- genVReg(regID), genVReg(regID)
- };
- // The result of a double comparison is a 32bit result
- generateMachineInst(opCode, VReg[0], lhsreg, rhsreg);
- generateMachineInst(AMDIL::DNE, VReg[1], rhsreg, rhsreg);
- generateMachineInst(AMDIL::DNE, VReg[2], lhsreg, lhsreg);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_F_UEQ: {
- uint32_t VReg[4] = {
- genVReg(simpleVT), genVReg(simpleVT),
- genVReg(simpleVT), genVReg(simpleVT)
- };
- generateMachineInst(AMDIL::FEQ, VReg[0], lhsreg, rhsreg);
- generateMachineInst(AMDIL::FNE, VReg[1], lhsreg, lhsreg);
- generateMachineInst(AMDIL::FNE, VReg[2], rhsreg, rhsreg);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_F_ONE: {
- uint32_t VReg[4] = {
- genVReg(simpleVT), genVReg(simpleVT),
- genVReg(simpleVT), genVReg(simpleVT)
- };
- generateMachineInst(AMDIL::FNE, VReg[0], lhsreg, rhsreg);
- generateMachineInst(AMDIL::FEQ, VReg[1], lhsreg, lhsreg);
- generateMachineInst(AMDIL::FEQ, VReg[2], rhsreg, rhsreg);
- generateMachineInst(AMDIL::BINARY_AND_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_AND_f32,
- DST.getReg(), VReg[2], VReg[3]);
- }
- break;
- case AMDILCC::IL_CC_D_UEQ: {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[4] = {
- genVReg(regID), genVReg(regID),
- genVReg(regID), genVReg(regID)
- };
- // The result of a double comparison is a 32bit result
- generateMachineInst(AMDIL::DEQ, VReg[0], lhsreg, rhsreg);
- generateMachineInst(AMDIL::DNE, VReg[1], lhsreg, lhsreg);
- generateMachineInst(AMDIL::DNE, VReg[2], rhsreg, rhsreg);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[2], VReg[3]);
-
- }
- break;
- case AMDILCC::IL_CC_D_ONE: {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[4] = {
- genVReg(regID), genVReg(regID),
- genVReg(regID), genVReg(regID)
- };
- // The result of a double comparison is a 32bit result
- generateMachineInst(AMDIL::DNE, VReg[0], lhsreg, rhsreg);
- generateMachineInst(AMDIL::DEQ, VReg[1], lhsreg, lhsreg);
- generateMachineInst(AMDIL::DEQ, VReg[2], rhsreg, rhsreg);
- generateMachineInst(AMDIL::BINARY_AND_f32,
- VReg[3], VReg[0], VReg[1]);
- generateMachineInst(AMDIL::BINARY_AND_f32,
- DST.getReg(), VReg[2], VReg[3]);
-
- }
- break;
- case AMDILCC::IL_CC_F_O: {
- uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
- generateMachineInst(AMDIL::FEQ, VReg[0], rhsreg, rhsreg);
- generateMachineInst(AMDIL::FEQ, VReg[1], lhsreg, lhsreg);
- generateMachineInst(AMDIL::BINARY_AND_f32,
- DST.getReg(), VReg[0], VReg[1]);
- }
- break;
- case AMDILCC::IL_CC_D_O: {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
- // The result of a double comparison is a 32bit result
- generateMachineInst(AMDIL::DEQ, VReg[0], rhsreg, rhsreg);
- generateMachineInst(AMDIL::DEQ, VReg[1], lhsreg, lhsreg);
- generateMachineInst(AMDIL::BINARY_AND_f32,
- DST.getReg(), VReg[0], VReg[1]);
- }
- break;
- case AMDILCC::IL_CC_F_UO: {
- uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
- generateMachineInst(AMDIL::FNE, VReg[0], rhsreg, rhsreg);
- generateMachineInst(AMDIL::FNE, VReg[1], lhsreg, lhsreg);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[0], VReg[1]);
- }
- break;
- case AMDILCC::IL_CC_D_UO: {
- uint32_t regID = AMDIL::GPRF64RegClassID;
- uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
- // The result of a double comparison is a 32bit result
- generateMachineInst(AMDIL::DNE, VReg[0], rhsreg, rhsreg);
- generateMachineInst(AMDIL::DNE, VReg[1], lhsreg, lhsreg);
- generateMachineInst(AMDIL::BINARY_OR_f32,
- DST.getReg(), VReg[0], VReg[1]);
- }
- break;
- case AMDILCC::IL_CC_L_LE:
- case AMDILCC::IL_CC_L_GE:
- case AMDILCC::IL_CC_L_EQ:
- case AMDILCC::IL_CC_L_NE:
- case AMDILCC::IL_CC_L_LT:
- case AMDILCC::IL_CC_L_GT:
- case AMDILCC::IL_CC_UL_LE:
- case AMDILCC::IL_CC_UL_GE:
- case AMDILCC::IL_CC_UL_EQ:
- case AMDILCC::IL_CC_UL_NE:
- case AMDILCC::IL_CC_UL_LT:
- case AMDILCC::IL_CC_UL_GT: {
- const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
- if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
- generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
- } else {
- generateLongRelational(MI, opCode);
- }
- }
- break;
- case AMDILCC::COND_ERROR:
- assert(0 && "Invalid CC code");
- break;
- };
-}
//===----------------------------------------------------------------------===//
// TargetLowering Class Implementation Begins
@@ -1082,7 +90,9 @@
: TargetLowering(TM, new TargetLoweringObjectFileELF())
{
setBooleanVectorContents( ZeroOrNegativeOneBooleanContent );
- int types[] = {
+ setBooleanContents( ZeroOrNegativeOneBooleanContent );
+ int types[] =
+ {
(int)MVT::i8,
(int)MVT::i16,
(int)MVT::i32,
@@ -1101,19 +111,22 @@
(int)MVT::v2i64
};
- int IntTypes[] = {
+ int IntTypes[] =
+ {
(int)MVT::i8,
(int)MVT::i16,
(int)MVT::i32,
(int)MVT::i64
};
- int FloatTypes[] = {
+ int FloatTypes[] =
+ {
(int)MVT::f32,
(int)MVT::f64
};
- int VectorTypes[] = {
+ int VectorTypes[] =
+ {
(int)MVT::v2i8,
(int)MVT::v4i8,
(int)MVT::v2i16,
@@ -1131,35 +144,56 @@
size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
+ &this->getTargetMachine())->getSubtargetImpl();
+ uint32_t promoteOpCodes[] = {
+ ISD::AND, ISD::XOR, ISD::OR, ISD::SETCC, ISD::SDIV, ISD::SREM, ISD::UDIV,
+ ISD::UREM,
+ ISD::SHL, ISD::SRL, ISD::SRA
+ };
// These are the current register classes that are
// supported
-
addRegisterClass(MVT::i32, &AMDIL::GPRI32RegClass);
addRegisterClass(MVT::f32, &AMDIL::GPRF32RegClass);
if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
- addRegisterClass(MVT::f64, &AMDIL::GPRF64RegClass);
+ addRegisterClass(MVT::f64, &AMDIL::GPRF64RegClass);
addRegisterClass(MVT::v2f64, &AMDIL::GPRV2F64RegClass);
}
if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
- addRegisterClass(MVT::i8, &AMDIL::GPRI8RegClass);
+ addRegisterClass(MVT::i8, &AMDIL::GPRI8RegClass);
addRegisterClass(MVT::v2i8, &AMDIL::GPRV2I8RegClass);
addRegisterClass(MVT::v4i8, &AMDIL::GPRV4I8RegClass);
- setOperationAction(ISD::Constant , MVT::i8 , Legal);
+ setOperationAction(ISD::Constant, MVT::i8, Legal);
+ /*
+ for (unsigned x = 0, y = sizeof(promoteOpCodes)/sizeof(uint32_t); x < y; ++x) {
+ setOperationAction(promoteOpCodes[x], MVT::i8, Promote);
+ setOperationAction(promoteOpCodes[x], MVT::v2i8, Promote);
+ setOperationAction(promoteOpCodes[x], MVT::v4i8, Promote);
+ AddPromotedToType(promoteOpCodes[x], MVT::v2i8, MVT::v2i32);
+ AddPromotedToType(promoteOpCodes[x], MVT::v4i8, MVT::v4i32);
+ }
+ */
}
if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
- addRegisterClass(MVT::i16, &AMDIL::GPRI16RegClass);
+ addRegisterClass(MVT::i16, &AMDIL::GPRI16RegClass);
addRegisterClass(MVT::v2i16, &AMDIL::GPRV2I16RegClass);
addRegisterClass(MVT::v4i16, &AMDIL::GPRV4I16RegClass);
- setOperationAction(ISD::Constant , MVT::i16 , Legal);
+ setOperationAction(ISD::Constant, MVT::i16, Legal);
+ /*
+ for (unsigned x = 0, y = sizeof(promoteOpCodes)/sizeof(uint32_t); x < y; ++x) {
+ setOperationAction(promoteOpCodes[x], MVT::i16, Promote);
+ setOperationAction(promoteOpCodes[x], MVT::v2i16, Promote);
+ setOperationAction(promoteOpCodes[x], MVT::v4i16, Promote);
+ AddPromotedToType(promoteOpCodes[x], MVT::v2i16, MVT::v2i32);
+ }
+ */
}
addRegisterClass(MVT::v2f32, &AMDIL::GPRV2F32RegClass);
addRegisterClass(MVT::v4f32, &AMDIL::GPRV4F32RegClass);
addRegisterClass(MVT::v2i32, &AMDIL::GPRV2I32RegClass);
addRegisterClass(MVT::v4i32, &AMDIL::GPRV4I32RegClass);
if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
- addRegisterClass(MVT::i64, &AMDIL::GPRI64RegClass);
+ addRegisterClass(MVT::i64, &AMDIL::GPRI64RegClass);
addRegisterClass(MVT::v2i64, &AMDIL::GPRV2I64RegClass);
}
@@ -1168,7 +202,7 @@
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FLOG , MVT::f32, Legal);
+ setOperationAction(ISD::FLOG, MVT::f32, Legal);
// Set explicitly to expand in case default changes
setOperationAction(ISD::FRINT, MVT::f32, Expand);
@@ -1180,17 +214,17 @@
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Expand);
- setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SUBE, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::ADDE, VT, Expand);
setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::BRCOND, VT, Custom);
- setOperationAction(ISD::BR_CC, VT, Custom);
setOperationAction(ISD::BR_JT, VT, Expand);
+ // TODO: This should only be for integer/f64 types,
+ // f32 types can you if_relop instruction.
+ setOperationAction(ISD::BR_CC, VT, Expand);
setOperationAction(ISD::BRIND, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
// TODO: Implement custom UREM/SREM routines
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
@@ -1202,14 +236,42 @@
setOperationAction(ISD::GlobalAddress, VT, Custom);
setOperationAction(ISD::JumpTable, VT, Custom);
setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::SELECT_CC, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ /*
+ setCondCodeAction(ISD::SETGT, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
+ */
+ setCondCodeAction(ISD::SETONE, VT, Expand);
+ setCondCodeAction(ISD::SETUEQ, VT, Expand);
+ setCondCodeAction(ISD::SETO, VT, Expand);
+ setCondCodeAction(ISD::SETUO, VT, Expand);
+ // FIXME: Need to support this instruction!
+ setOperationAction(ISD::VSELECT, VT, Expand);
if (VT != MVT::i64 && VT != MVT::v2i64) {
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
}
+ MVT xVT = MVT(VT);
+ if (xVT.isInteger()) {
+ setCondCodeAction(ISD::SETUGT, VT, Expand);
+ setCondCodeAction(ISD::SETULE, VT, Expand);
+ setCondCodeAction(ISD::SETOGT, VT, Expand);
+ setCondCodeAction(ISD::SETOLE, VT, Expand);
+ setCondCodeAction(ISD::SETOGE, VT, Expand);
+ setCondCodeAction(ISD::SETOLT, VT, Expand);
+ setCondCodeAction(ISD::SETOEQ, VT, Expand);
+ setCondCodeAction(ISD::SETUNE, VT, Expand);
+ }
+ /*
+ for (unsigned y = 0; y < numTypes; ++y) {
+ MVT::SimpleValueType DVT = (MVT::SimpleValueType)types[y];
+ setTruncStoreAction(VT, DVT, Expand);
+ }
+ setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
+ */
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
@@ -1219,14 +281,8 @@
// IL does not have these operations for floating point types
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::SETOLT, VT, Expand);
- setOperationAction(ISD::SETOGE, VT, Expand);
- setOperationAction(ISD::SETOGT, VT, Expand);
- setOperationAction(ISD::SETOLE, VT, Expand);
- setOperationAction(ISD::SETULT, VT, Expand);
- setOperationAction(ISD::SETUGE, VT, Expand);
- setOperationAction(ISD::SETUGT, VT, Expand);
- setOperationAction(ISD::SETULE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
+ setCondCodeAction(ISD::SETUGE, VT, Expand);
}
for (unsigned int x = 0; x < numIntTypes; ++x) {
@@ -1252,7 +308,8 @@
setOperationAction(ISD::CTLZ, VT, Expand);
}
- for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) {
+ for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
+ {
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
@@ -1264,18 +321,17 @@
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- // setOperationAction(ISD::VSETCC, VT, Expand);
- setOperationAction(ISD::SETCC, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
-
}
+
setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
- if (stm->calVersion() < CAL_VERSION_SC_139
- || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
+ if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
setOperationAction(ISD::MUL, MVT::i64, Custom);
}
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
setOperationAction(ISD::SUB, MVT::i64, Custom);
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::MULHU, MVT::i64, Expand);
@@ -1284,9 +340,12 @@
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
- setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::Constant, MVT::i64, Legal);
setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
@@ -1296,6 +355,7 @@
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
}
if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
// we support loading/storing v2f64 but not operations on the type
@@ -1305,7 +365,7 @@
setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
// We want to expand vector conversions into their scalar
// counterparts.
@@ -1319,6 +379,7 @@
setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
}
// TODO: Fix the UDIV24 algorithm so it works for these
// types correctly. This needs vector comparisons
@@ -1332,10 +393,9 @@
setOperationAction(ISD::ADDE, MVT::Other, Expand);
setOperationAction(ISD::ADDC, MVT::Other, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::BR_CC, MVT::Other, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::SETCC, MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
@@ -1343,15 +403,15 @@
setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
// Use the default implementation.
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
- setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
- setOperationAction(ISD::Constant , MVT::i32 , Legal);
- setOperationAction(ISD::TRAP , MVT::Other , Legal);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::Constant, MVT::i32, Legal);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
setStackPointerRegisterToSaveRestore(AMDIL::SP);
setSchedulingPreference(Sched::RegPressure);
@@ -1370,7 +430,6 @@
#undef numVectorTypes
#undef numFloatTypes
}
-
// This only works for region/local/global address spaces on EG/NI as
// the other address spaces required 128 bit alignement of loads/stores.
// However, there is no way to disable for those address spaces
@@ -1390,332 +449,157 @@
|| VT == MVT::f64 || VT == MVT::i64
|| VT == MVT::v2f64 || VT == MVT::v2i64);
}
-
const char *
AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
{
switch (Opcode) {
- default:
- return 0;
- case AMDILISD::INTTOANY:
- return "AMDILISD::INTTOANY";
- case AMDILISD::DP_TO_FP:
- return "AMDILISD::DP_TO_FP";
- case AMDILISD::FP_TO_DP:
- return "AMDILISD::FP_TO_DP";
- case AMDILISD::BITCONV:
- return "AMDILISD::BITCONV";
- case AMDILISD::CMOV:
- return "AMDILISD::CMOV";
- case AMDILISD::CMOVLOG:
- return "AMDILISD::CMOVLOG";
- case AMDILISD::INEGATE:
- return "AMDILISD::INEGATE";
- case AMDILISD::MAD:
- return "AMDILISD::MAD";
- case AMDILISD::UMAD:
- return "AMDILISD::UMAD";
- case AMDILISD::CALL:
- return "AMDILISD::CALL";
- case AMDILISD::RET:
- return "AMDILISD::RET";
- case AMDILISD::IFFB_HI:
- return "AMDILISD::IFFB_HI";
- case AMDILISD::IFFB_LO:
- return "AMDILISD::IFFB_LO";
- case AMDILISD::ADD:
- return "AMDILISD::ADD";
- case AMDILISD::UMUL:
- return "AMDILISD::UMUL";
- case AMDILISD::AND:
- return "AMDILISD::AND";
- case AMDILISD::OR:
- return "AMDILISD::OR";
- case AMDILISD::NOT:
- return "AMDILISD::NOT";
- case AMDILISD::XOR:
- return "AMDILISD::XOR";
- case AMDILISD::DIV_INF:
- return "AMDILISD::DIV_INF";
- case AMDILISD::SMAX:
- return "AMDILISD::SMAX";
- case AMDILISD::PHIMOVE:
- return "AMDILISD::PHIMOVE";
- case AMDILISD::MOVE:
- return "AMDILISD::MOVE";
- case AMDILISD::VBUILD:
- return "AMDILISD::VBUILD";
- case AMDILISD::VEXTRACT:
- return "AMDILISD::VEXTRACT";
- case AMDILISD::VINSERT:
- return "AMDILISD::VINSERT";
- case AMDILISD::VCONCAT:
- return "AMDILISD::VCONCAT";
- case AMDILISD::LCREATE:
- return "AMDILISD::LCREATE";
- case AMDILISD::LCOMPHI:
- return "AMDILISD::LCOMPHI";
- case AMDILISD::LCOMPLO:
- return "AMDILISD::LCOMPLO";
- case AMDILISD::DCREATE:
- return "AMDILISD::DCREATE";
- case AMDILISD::DCOMPHI:
- return "AMDILISD::DCOMPHI";
- case AMDILISD::DCOMPLO:
- return "AMDILISD::DCOMPLO";
- case AMDILISD::LCREATE2:
- return "AMDILISD::LCREATE2";
- case AMDILISD::LCOMPHI2:
- return "AMDILISD::LCOMPHI2";
- case AMDILISD::LCOMPLO2:
- return "AMDILISD::LCOMPLO2";
- case AMDILISD::DCREATE2:
- return "AMDILISD::DCREATE2";
- case AMDILISD::DCOMPHI2:
- return "AMDILISD::DCOMPHI2";
- case AMDILISD::DCOMPLO2:
- return "AMDILISD::DCOMPLO2";
- case AMDILISD::CMP:
- return "AMDILISD::CMP";
- case AMDILISD::IL_CC_I_LT:
- return "AMDILISD::IL_CC_I_LT";
- case AMDILISD::IL_CC_I_LE:
- return "AMDILISD::IL_CC_I_LE";
- case AMDILISD::IL_CC_I_GT:
- return "AMDILISD::IL_CC_I_GT";
- case AMDILISD::IL_CC_I_GE:
- return "AMDILISD::IL_CC_I_GE";
- case AMDILISD::IL_CC_I_EQ:
- return "AMDILISD::IL_CC_I_EQ";
- case AMDILISD::IL_CC_I_NE:
- return "AMDILISD::IL_CC_I_NE";
- case AMDILISD::RET_FLAG:
- return "AMDILISD::RET_FLAG";
- case AMDILISD::BRANCH_COND:
- return "AMDILISD::BRANCH_COND";
- case AMDILISD::LOOP_NZERO:
- return "AMDILISD::LOOP_NZERO";
- case AMDILISD::LOOP_ZERO:
- return "AMDILISD::LOOP_ZERO";
- case AMDILISD::LOOP_CMP:
- return "AMDILISD::LOOP_CMP";
- case AMDILISD::ADDADDR:
- return "AMDILISD::ADDADDR";
- case AMDILISD::ATOM_F_ADD:
- return "AMDILISD::ATOM_F_ADD";
- case AMDILISD::ATOM_F_AND:
- return "AMDILISD::ATOM_F_AND";
- case AMDILISD::ATOM_F_CMPXCHG:
- return "AMDILISD::ATOM_F_CMPXCHG";
- case AMDILISD::ATOM_F_DEC:
- return "AMDILISD::ATOM_F_DEC";
- case AMDILISD::ATOM_F_INC:
- return "AMDILISD::ATOM_F_INC";
- case AMDILISD::ATOM_F_MAX:
- return "AMDILISD::ATOM_F_MAX";
- case AMDILISD::ATOM_F_UMAX:
- return "AMDILISD::ATOM_F_UMAX";
- case AMDILISD::ATOM_F_MIN:
- return "AMDILISD::ATOM_F_MIN";
- case AMDILISD::ATOM_F_UMIN:
- return "AMDILISD::ATOM_F_UMIN";
- case AMDILISD::ATOM_F_OR:
- return "AMDILISD::ATOM_F_OR";
- case AMDILISD::ATOM_F_SUB:
- return "AMDILISD::ATOM_F_SUB";
- case AMDILISD::ATOM_F_XCHG:
- return "AMDILISD::ATOM_F_XCHG";
- case AMDILISD::ATOM_F_XOR:
- return "AMDILISD::ATOM_F_XOR";
- case AMDILISD::ATOM_G_ADD:
- return "AMDILISD::ATOM_G_ADD";
- case AMDILISD::ATOM_G_AND:
- return "AMDILISD::ATOM_G_AND";
- case AMDILISD::ATOM_G_CMPXCHG:
- return "AMDILISD::ATOM_G_CMPXCHG";
- case AMDILISD::ATOM_G_DEC:
- return "AMDILISD::ATOM_G_DEC";
- case AMDILISD::ATOM_G_INC:
- return "AMDILISD::ATOM_G_INC";
- case AMDILISD::ATOM_G_MAX:
- return "AMDILISD::ATOM_G_MAX";
- case AMDILISD::ATOM_G_UMAX:
- return "AMDILISD::ATOM_G_UMAX";
- case AMDILISD::ATOM_G_MIN:
- return "AMDILISD::ATOM_G_MIN";
- case AMDILISD::ATOM_G_UMIN:
- return "AMDILISD::ATOM_G_UMIN";
- case AMDILISD::ATOM_G_OR:
- return "AMDILISD::ATOM_G_OR";
- case AMDILISD::ATOM_G_SUB:
- return "AMDILISD::ATOM_G_SUB";
- case AMDILISD::ATOM_G_RSUB:
- return "AMDILISD::ATOM_G_RSUB";
- case AMDILISD::ATOM_G_XCHG:
- return "AMDILISD::ATOM_G_XCHG";
- case AMDILISD::ATOM_G_XOR:
- return "AMDILISD::ATOM_G_XOR";
- case AMDILISD::ATOM_G_ADD_NORET:
- return "AMDILISD::ATOM_G_ADD_NORET";
- case AMDILISD::ATOM_G_AND_NORET:
- return "AMDILISD::ATOM_G_AND_NORET";
- case AMDILISD::ATOM_G_CMPXCHG_NORET:
- return "AMDILISD::ATOM_G_CMPXCHG_NORET";
- case AMDILISD::ATOM_G_DEC_NORET:
- return "AMDILISD::ATOM_G_DEC_NORET";
- case AMDILISD::ATOM_G_INC_NORET:
- return "AMDILISD::ATOM_G_INC_NORET";
- case AMDILISD::ATOM_G_MAX_NORET:
- return "AMDILISD::ATOM_G_MAX_NORET";
- case AMDILISD::ATOM_G_UMAX_NORET:
- return "AMDILISD::ATOM_G_UMAX_NORET";
- case AMDILISD::ATOM_G_MIN_NORET:
- return "AMDILISD::ATOM_G_MIN_NORET";
- case AMDILISD::ATOM_G_UMIN_NORET:
- return "AMDILISD::ATOM_G_UMIN_NORET";
- case AMDILISD::ATOM_G_OR_NORET:
- return "AMDILISD::ATOM_G_OR_NORET";
- case AMDILISD::ATOM_G_SUB_NORET:
- return "AMDILISD::ATOM_G_SUB_NORET";
- case AMDILISD::ATOM_G_RSUB_NORET:
- return "AMDILISD::ATOM_G_RSUB_NORET";
- case AMDILISD::ATOM_G_XCHG_NORET:
- return "AMDILISD::ATOM_G_XCHG_NORET";
- case AMDILISD::ATOM_G_XOR_NORET:
- return "AMDILISD::ATOM_G_XOR_NORET";
- case AMDILISD::ATOM_L_ADD:
- return "AMDILISD::ATOM_L_ADD";
- case AMDILISD::ATOM_L_AND:
- return "AMDILISD::ATOM_L_AND";
- case AMDILISD::ATOM_L_CMPXCHG:
- return "AMDILISD::ATOM_L_CMPXCHG";
- case AMDILISD::ATOM_L_DEC:
- return "AMDILISD::ATOM_L_DEC";
- case AMDILISD::ATOM_L_INC:
- return "AMDILISD::ATOM_L_INC";
- case AMDILISD::ATOM_L_MAX:
- return "AMDILISD::ATOM_L_MAX";
- case AMDILISD::ATOM_L_UMAX:
- return "AMDILISD::ATOM_L_UMAX";
- case AMDILISD::ATOM_L_MIN:
- return "AMDILISD::ATOM_L_MIN";
- case AMDILISD::ATOM_L_UMIN:
- return "AMDILISD::ATOM_L_UMIN";
- case AMDILISD::ATOM_L_OR:
- return "AMDILISD::ATOM_L_OR";
- case AMDILISD::ATOM_L_SUB:
- return "AMDILISD::ATOM_L_SUB";
- case AMDILISD::ATOM_L_RSUB:
- return "AMDILISD::ATOM_L_RSUB";
- case AMDILISD::ATOM_L_XCHG:
- return "AMDILISD::ATOM_L_XCHG";
- case AMDILISD::ATOM_L_XOR:
- return "AMDILISD::ATOM_L_XOR";
- case AMDILISD::ATOM_L_ADD_NORET:
- return "AMDILISD::ATOM_L_ADD_NORET";
- case AMDILISD::ATOM_L_AND_NORET:
- return "AMDILISD::ATOM_L_AND_NORET";
- case AMDILISD::ATOM_L_CMPXCHG_NORET:
- return "AMDILISD::ATOM_L_CMPXCHG_NORET";
- case AMDILISD::ATOM_L_DEC_NORET:
- return "AMDILISD::ATOM_L_DEC_NORET";
- case AMDILISD::ATOM_L_INC_NORET:
- return "AMDILISD::ATOM_L_INC_NORET";
- case AMDILISD::ATOM_L_MAX_NORET:
- return "AMDILISD::ATOM_L_MAX_NORET";
- case AMDILISD::ATOM_L_UMAX_NORET:
- return "AMDILISD::ATOM_L_UMAX_NORET";
- case AMDILISD::ATOM_L_MIN_NORET:
- return "AMDILISD::ATOM_L_MIN_NORET";
- case AMDILISD::ATOM_L_UMIN_NORET:
- return "AMDILISD::ATOM_L_UMIN_NORET";
- case AMDILISD::ATOM_L_OR_NORET:
- return "AMDILISD::ATOM_L_OR_NORET";
- case AMDILISD::ATOM_L_SUB_NORET:
- return "AMDILISD::ATOM_L_SUB_NORET";
- case AMDILISD::ATOM_L_RSUB_NORET:
- return "AMDILISD::ATOM_L_RSUB_NORET";
- case AMDILISD::ATOM_L_XCHG_NORET:
- return "AMDILISD::ATOM_L_XCHG_NORET";
- case AMDILISD::ATOM_R_ADD:
- return "AMDILISD::ATOM_R_ADD";
- case AMDILISD::ATOM_R_AND:
- return "AMDILISD::ATOM_R_AND";
- case AMDILISD::ATOM_R_CMPXCHG:
- return "AMDILISD::ATOM_R_CMPXCHG";
- case AMDILISD::ATOM_R_DEC:
- return "AMDILISD::ATOM_R_DEC";
- case AMDILISD::ATOM_R_INC:
- return "AMDILISD::ATOM_R_INC";
- case AMDILISD::ATOM_R_MAX:
- return "AMDILISD::ATOM_R_MAX";
- case AMDILISD::ATOM_R_UMAX:
- return "AMDILISD::ATOM_R_UMAX";
- case AMDILISD::ATOM_R_MIN:
- return "AMDILISD::ATOM_R_MIN";
- case AMDILISD::ATOM_R_UMIN:
- return "AMDILISD::ATOM_R_UMIN";
- case AMDILISD::ATOM_R_OR:
- return "AMDILISD::ATOM_R_OR";
- case AMDILISD::ATOM_R_MSKOR:
- return "AMDILISD::ATOM_R_MSKOR";
- case AMDILISD::ATOM_R_SUB:
- return "AMDILISD::ATOM_R_SUB";
- case AMDILISD::ATOM_R_RSUB:
- return "AMDILISD::ATOM_R_RSUB";
- case AMDILISD::ATOM_R_XCHG:
- return "AMDILISD::ATOM_R_XCHG";
- case AMDILISD::ATOM_R_XOR:
- return "AMDILISD::ATOM_R_XOR";
- case AMDILISD::ATOM_R_ADD_NORET:
- return "AMDILISD::ATOM_R_ADD_NORET";
- case AMDILISD::ATOM_R_AND_NORET:
- return "AMDILISD::ATOM_R_AND_NORET";
- case AMDILISD::ATOM_R_CMPXCHG_NORET:
- return "AMDILISD::ATOM_R_CMPXCHG_NORET";
- case AMDILISD::ATOM_R_DEC_NORET:
- return "AMDILISD::ATOM_R_DEC_NORET";
- case AMDILISD::ATOM_R_INC_NORET:
- return "AMDILISD::ATOM_R_INC_NORET";
- case AMDILISD::ATOM_R_MAX_NORET:
- return "AMDILISD::ATOM_R_MAX_NORET";
- case AMDILISD::ATOM_R_UMAX_NORET:
- return "AMDILISD::ATOM_R_UMAX_NORET";
- case AMDILISD::ATOM_R_MIN_NORET:
- return "AMDILISD::ATOM_R_MIN_NORET";
- case AMDILISD::ATOM_R_UMIN_NORET:
- return "AMDILISD::ATOM_R_UMIN_NORET";
- case AMDILISD::ATOM_R_OR_NORET:
- return "AMDILISD::ATOM_R_OR_NORET";
- case AMDILISD::ATOM_R_MSKOR_NORET:
- return "AMDILISD::ATOM_R_MSKOR_NORET";
- case AMDILISD::ATOM_R_SUB_NORET:
- return "AMDILISD::ATOM_R_SUB_NORET";
- case AMDILISD::ATOM_R_RSUB_NORET:
- return "AMDILISD::ATOM_R_RSUB_NORET";
- case AMDILISD::ATOM_R_XCHG_NORET:
- return "AMDILISD::ATOM_R_XCHG_NORET";
- case AMDILISD::ATOM_R_XOR_NORET:
- return "AMDILISD::ATOM_R_XOR_NORET";
- case AMDILISD::APPEND_ALLOC:
- return "AMDILISD::APPEND_ALLOC";
- case AMDILISD::APPEND_CONSUME:
- return "AMDILISD::APPEND_CONSUME";
+ default: return 0;
+ case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP";
+ case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP";
+ case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
+ case ISD::SELECT: return "ISD::SELECT";
+ case AMDILISD::CALL: return "AMDILISD::CALL";
+ case AMDILISD::RET: return "AMDILISD::RET";
+ case AMDILISD::ADD: return "AMDILISD::ADD";
+ case AMDILISD::UMUL: return "AMDILISD::UMUL";
+ case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
+ case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
+ case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
+ case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
+ case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
+ case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
+ case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
+ case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
+ case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
+ case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
+ case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
+ case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
+ case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
+ case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
+ case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
+ case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
+ case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
+ case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
+ case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
+ case AMDILISD::ATOM_F_ADD: return "AMDILISD::ATOM_F_ADD";
+ case AMDILISD::ATOM_F_AND: return "AMDILISD::ATOM_F_AND";
+ case AMDILISD::ATOM_F_CMPXCHG: return "AMDILISD::ATOM_F_CMPXCHG";
+ case AMDILISD::ATOM_F_DEC: return "AMDILISD::ATOM_F_DEC";
+ case AMDILISD::ATOM_F_INC: return "AMDILISD::ATOM_F_INC";
+ case AMDILISD::ATOM_F_MAX: return "AMDILISD::ATOM_F_MAX";
+ case AMDILISD::ATOM_F_UMAX: return "AMDILISD::ATOM_F_UMAX";
+ case AMDILISD::ATOM_F_MIN: return "AMDILISD::ATOM_F_MIN";
+ case AMDILISD::ATOM_F_UMIN: return "AMDILISD::ATOM_F_UMIN";
+ case AMDILISD::ATOM_F_OR: return "AMDILISD::ATOM_F_OR";
+ case AMDILISD::ATOM_F_SUB: return "AMDILISD::ATOM_F_SUB";
+ case AMDILISD::ATOM_F_XCHG: return "AMDILISD::ATOM_F_XCHG";
+ case AMDILISD::ATOM_F_XOR: return "AMDILISD::ATOM_F_XOR";
+ case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
+ case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
+ case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
+ case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
+ case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
+ case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
+ case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
+ case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
+ case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
+ case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
+ case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
+ case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
+ case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
+ case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
+ case AMDILISD::ATOM_G_STORE: return "AMDILISD::ATOM_G_STORE";
+ case AMDILISD::ATOM_G_LOAD: return "AMDILISD::ATOM_G_LOAD";
+ case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
+ case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
+ case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
+ case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
+ case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
+ case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
+ case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
+ case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
+ case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
+ case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
+ case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
+ case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
+ case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
+ case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
+ case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
+ case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
+ case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
+ case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
+ case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
+ case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
+ case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
+ case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
+ case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
+ case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
+ case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
+ case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
+ case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
+ case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
+ case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
+ case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
+ case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
+ case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
+ case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
+ case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
+ case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
+ case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
+ case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
+ case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
+ case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
+ case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
+ case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
+ case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
+ case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
+ case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
+ case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
+ case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
+ case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
+ case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
+ case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
+ case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
+ case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
+ case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
+ case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
+ case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
+ case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
+ case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
+ case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
+ case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
+ case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
+ case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
+ case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
+ case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
+ case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
+ case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
+ case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
+ case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
+ case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
+ case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
+ case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
+ case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
+ case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
+ case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
+ case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
};
}
-
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
EVT AMDILTargetLowering::getSetCCResultType(EVT VT) const
{
- if (!VT.isVector())
- return MVT::i32;
- return VT.changeVectorElementTypeToInteger();
+ if (VT == MVT::Other) return MVT::i32;
+ if (!VT.isVector()) {
+ return VT.getSizeInBits() <= 32 ? MVT::i32 : MVT::i64;
+ }
+ return MVT::getVectorVT(
+ (VT.getScalarType().getSizeInBits() == 64) ? MVT::i64 : MVT::i32,
+ VT.getVectorNumElements());
}
-
-
bool
AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const
+ const CallInst &I,
+ unsigned Intrinsic) const
{
if (Intrinsic <= AMDILIntrinsic::last_non_AMDIL_intrinsic
|| Intrinsic > AMDILIntrinsic::num_AMDIL_intrinsics) {
@@ -1723,488 +607,409 @@
}
bool bitCastToInt = false;
unsigned IntNo;
+ bool isStore = true;
bool isRet = true;
const AMDILSubtarget *STM = &this->getTargetMachine()
.getSubtarget<AMDILSubtarget>();
switch (Intrinsic) {
- default:
- return false; // Don't custom lower most intrinsics.
+ default: return false; // Don't custom lower most intrinsics.
case AMDILIntrinsic::AMDIL_atomic_add_gi32:
case AMDILIntrinsic::AMDIL_atomic_add_gu32:
case AMDILIntrinsic::AMDIL_atomic_add_gi64:
case AMDILIntrinsic::AMDIL_atomic_add_gu64:
- IntNo = AMDILISD::ATOM_G_ADD;
- break;
+ IntNo = AMDILISD::ATOM_G_ADD; break;
case AMDILIntrinsic::AMDIL_atomic_add_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_add_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_add_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_add_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_ADD_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_add_lu32:
case AMDILIntrinsic::AMDIL_atomic_add_li32:
case AMDILIntrinsic::AMDIL_atomic_add_lu64:
case AMDILIntrinsic::AMDIL_atomic_add_li64:
- IntNo = AMDILISD::ATOM_L_ADD;
- break;
+ IntNo = AMDILISD::ATOM_L_ADD; break;
case AMDILIntrinsic::AMDIL_atomic_add_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_add_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_add_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_add_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_ADD_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_add_ru32:
case AMDILIntrinsic::AMDIL_atomic_add_ri32:
case AMDILIntrinsic::AMDIL_atomic_add_ru64:
case AMDILIntrinsic::AMDIL_atomic_add_ri64:
- IntNo = AMDILISD::ATOM_R_ADD;
- break;
+ IntNo = AMDILISD::ATOM_R_ADD; break;
case AMDILIntrinsic::AMDIL_atomic_add_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_add_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_add_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_add_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_ADD_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_and_gi32:
case AMDILIntrinsic::AMDIL_atomic_and_gu32:
case AMDILIntrinsic::AMDIL_atomic_and_gi64:
case AMDILIntrinsic::AMDIL_atomic_and_gu64:
- IntNo = AMDILISD::ATOM_G_AND;
- break;
+ IntNo = AMDILISD::ATOM_G_AND; break;
case AMDILIntrinsic::AMDIL_atomic_and_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_and_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_and_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_and_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_AND_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_AND_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_and_li32:
case AMDILIntrinsic::AMDIL_atomic_and_lu32:
case AMDILIntrinsic::AMDIL_atomic_and_li64:
case AMDILIntrinsic::AMDIL_atomic_and_lu64:
- IntNo = AMDILISD::ATOM_L_AND;
- break;
+ IntNo = AMDILISD::ATOM_L_AND; break;
case AMDILIntrinsic::AMDIL_atomic_and_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_and_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_and_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_and_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_AND_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_AND_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_and_ri32:
case AMDILIntrinsic::AMDIL_atomic_and_ru32:
case AMDILIntrinsic::AMDIL_atomic_and_ri64:
case AMDILIntrinsic::AMDIL_atomic_and_ru64:
- IntNo = AMDILISD::ATOM_R_AND;
- break;
+ IntNo = AMDILISD::ATOM_R_AND; break;
case AMDILIntrinsic::AMDIL_atomic_and_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_and_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_and_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_and_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_AND_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_AND_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi32:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu32:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi64:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu64:
- IntNo = AMDILISD::ATOM_G_CMPXCHG;
- break;
+ IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li32:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu32:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li64:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu64:
- IntNo = AMDILISD::ATOM_L_CMPXCHG;
- break;
+ IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri32:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru32:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri64:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru64:
- IntNo = AMDILISD::ATOM_R_CMPXCHG;
- break;
+ IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_cmpxchg_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_dec_gi32:
case AMDILIntrinsic::AMDIL_atomic_dec_gu32:
case AMDILIntrinsic::AMDIL_atomic_dec_gi64:
case AMDILIntrinsic::AMDIL_atomic_dec_gu64:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_G_DEC;
- } else {
- IntNo = AMDILISD::ATOM_G_SUB;
- }
+ IntNo = AMDILISD::ATOM_G_DEC;
break;
case AMDILIntrinsic::AMDIL_atomic_dec_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_gu64_noret:
isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_G_DEC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_G_SUB_NORET;
- }
+ IntNo = AMDILISD::ATOM_G_DEC_NORET;
break;
case AMDILIntrinsic::AMDIL_atomic_dec_li32:
case AMDILIntrinsic::AMDIL_atomic_dec_lu32:
case AMDILIntrinsic::AMDIL_atomic_dec_li64:
case AMDILIntrinsic::AMDIL_atomic_dec_lu64:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_L_DEC;
- } else {
- IntNo = AMDILISD::ATOM_L_SUB;
- }
+ IntNo = AMDILISD::ATOM_L_DEC;
break;
case AMDILIntrinsic::AMDIL_atomic_dec_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_lu64_noret:
isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_L_DEC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_L_SUB_NORET;
- }
+ IntNo = AMDILISD::ATOM_L_DEC_NORET;
break;
case AMDILIntrinsic::AMDIL_atomic_dec_ri32:
case AMDILIntrinsic::AMDIL_atomic_dec_ru32:
case AMDILIntrinsic::AMDIL_atomic_dec_ri64:
case AMDILIntrinsic::AMDIL_atomic_dec_ru64:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_R_DEC;
- } else {
- IntNo = AMDILISD::ATOM_R_SUB;
- }
+ IntNo = AMDILISD::ATOM_R_DEC;
break;
case AMDILIntrinsic::AMDIL_atomic_dec_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_dec_ru64_noret:
isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_R_DEC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_R_SUB_NORET;
- }
+ IntNo = AMDILISD::ATOM_R_DEC_NORET;
break;
case AMDILIntrinsic::AMDIL_atomic_inc_gi32:
case AMDILIntrinsic::AMDIL_atomic_inc_gu32:
case AMDILIntrinsic::AMDIL_atomic_inc_gi64:
case AMDILIntrinsic::AMDIL_atomic_inc_gu64:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_G_INC;
- } else {
- IntNo = AMDILISD::ATOM_G_ADD;
- }
+ IntNo = AMDILISD::ATOM_G_INC;
break;
case AMDILIntrinsic::AMDIL_atomic_inc_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_gu64_noret:
isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_G_INC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_G_ADD_NORET;
- }
+ IntNo = AMDILISD::ATOM_G_INC_NORET;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_store_gv4u32:
+ case AMDILIntrinsic::AMDIL_atomic_store_gv4i32:
+ case AMDILIntrinsic::AMDIL_atomic_store_gv2u32:
+ case AMDILIntrinsic::AMDIL_atomic_store_gv2i32:
+ case AMDILIntrinsic::AMDIL_atomic_store_gu64:
+ case AMDILIntrinsic::AMDIL_atomic_store_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_store_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_store_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_store_gu16:
+ case AMDILIntrinsic::AMDIL_atomic_store_gi16:
+ case AMDILIntrinsic::AMDIL_atomic_store_gu8:
+ case AMDILIntrinsic::AMDIL_atomic_store_gi8:
+ isRet = false;
+ IntNo = AMDILISD::ATOM_G_STORE;
+ break;
+ case AMDILIntrinsic::AMDIL_atomic_load_gv4u32:
+ case AMDILIntrinsic::AMDIL_atomic_load_gv4i32:
+ case AMDILIntrinsic::AMDIL_atomic_load_gv2u32:
+ case AMDILIntrinsic::AMDIL_atomic_load_gv2i32:
+ case AMDILIntrinsic::AMDIL_atomic_load_gu64:
+ case AMDILIntrinsic::AMDIL_atomic_load_gi64:
+ case AMDILIntrinsic::AMDIL_atomic_load_gu32:
+ case AMDILIntrinsic::AMDIL_atomic_load_gi32:
+ case AMDILIntrinsic::AMDIL_atomic_load_gu16:
+ case AMDILIntrinsic::AMDIL_atomic_load_gi16:
+ case AMDILIntrinsic::AMDIL_atomic_load_gu8:
+ case AMDILIntrinsic::AMDIL_atomic_load_gi8:
+ IntNo = AMDILISD::ATOM_G_LOAD;
+ isStore = false;
break;
case AMDILIntrinsic::AMDIL_atomic_inc_li32:
case AMDILIntrinsic::AMDIL_atomic_inc_lu32:
case AMDILIntrinsic::AMDIL_atomic_inc_li64:
case AMDILIntrinsic::AMDIL_atomic_inc_lu64:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_L_INC;
- } else {
- IntNo = AMDILISD::ATOM_L_ADD;
- }
+ IntNo = AMDILISD::ATOM_L_INC;
break;
case AMDILIntrinsic::AMDIL_atomic_inc_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_lu64_noret:
isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_L_INC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_L_ADD_NORET;
- }
+ IntNo = AMDILISD::ATOM_L_INC_NORET;
break;
case AMDILIntrinsic::AMDIL_atomic_inc_ri32:
case AMDILIntrinsic::AMDIL_atomic_inc_ru32:
case AMDILIntrinsic::AMDIL_atomic_inc_ri64:
case AMDILIntrinsic::AMDIL_atomic_inc_ru64:
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_R_INC;
- } else {
- IntNo = AMDILISD::ATOM_R_ADD;
- }
+ IntNo = AMDILISD::ATOM_R_INC;
break;
case AMDILIntrinsic::AMDIL_atomic_inc_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_inc_ru64_noret:
isRet = false;
- if (STM->calVersion() >= CAL_VERSION_SC_136) {
- IntNo = AMDILISD::ATOM_R_INC_NORET;
- } else {
- IntNo = AMDILISD::ATOM_R_ADD_NORET;
- }
+ IntNo = AMDILISD::ATOM_R_INC_NORET;
break;
case AMDILIntrinsic::AMDIL_atomic_max_gi32:
case AMDILIntrinsic::AMDIL_atomic_max_gi64:
- IntNo = AMDILISD::ATOM_G_MAX;
- break;
+ IntNo = AMDILISD::ATOM_G_MAX; break;
case AMDILIntrinsic::AMDIL_atomic_max_gu32:
case AMDILIntrinsic::AMDIL_atomic_max_gu64:
- IntNo = AMDILISD::ATOM_G_UMAX;
- break;
+ IntNo = AMDILISD::ATOM_G_UMAX; break;
case AMDILIntrinsic::AMDIL_atomic_max_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_max_gi64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_MAX_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_max_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_max_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_UMAX_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_max_li32:
case AMDILIntrinsic::AMDIL_atomic_max_li64:
- IntNo = AMDILISD::ATOM_L_MAX;
- break;
+ IntNo = AMDILISD::ATOM_L_MAX; break;
case AMDILIntrinsic::AMDIL_atomic_max_lu32:
case AMDILIntrinsic::AMDIL_atomic_max_lu64:
- IntNo = AMDILISD::ATOM_L_UMAX;
- break;
+ IntNo = AMDILISD::ATOM_L_UMAX; break;
case AMDILIntrinsic::AMDIL_atomic_max_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_max_li64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_MAX_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_max_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_max_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_UMAX_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_max_ri32:
case AMDILIntrinsic::AMDIL_atomic_max_ri64:
- IntNo = AMDILISD::ATOM_R_MAX;
- break;
+ IntNo = AMDILISD::ATOM_R_MAX; break;
case AMDILIntrinsic::AMDIL_atomic_max_ru32:
case AMDILIntrinsic::AMDIL_atomic_max_ru64:
- IntNo = AMDILISD::ATOM_R_UMAX;
- break;
+ IntNo = AMDILISD::ATOM_R_UMAX; break;
case AMDILIntrinsic::AMDIL_atomic_max_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_max_ri64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_MAX_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_max_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_max_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_UMAX_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_min_gi32:
case AMDILIntrinsic::AMDIL_atomic_min_gi64:
- IntNo = AMDILISD::ATOM_G_MIN;
- break;
+ IntNo = AMDILISD::ATOM_G_MIN; break;
case AMDILIntrinsic::AMDIL_atomic_min_gu32:
case AMDILIntrinsic::AMDIL_atomic_min_gu64:
- IntNo = AMDILISD::ATOM_G_UMIN;
- break;
+ IntNo = AMDILISD::ATOM_G_UMIN; break;
case AMDILIntrinsic::AMDIL_atomic_min_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_min_gi64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_MIN_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_min_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_min_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_UMIN_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_min_li32:
case AMDILIntrinsic::AMDIL_atomic_min_li64:
- IntNo = AMDILISD::ATOM_L_MIN;
- break;
+ IntNo = AMDILISD::ATOM_L_MIN; break;
case AMDILIntrinsic::AMDIL_atomic_min_lu32:
case AMDILIntrinsic::AMDIL_atomic_min_lu64:
- IntNo = AMDILISD::ATOM_L_UMIN;
- break;
+ IntNo = AMDILISD::ATOM_L_UMIN; break;
case AMDILIntrinsic::AMDIL_atomic_min_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_min_li64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_MIN_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_min_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_min_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_UMIN_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_min_ri32:
case AMDILIntrinsic::AMDIL_atomic_min_ri64:
- IntNo = AMDILISD::ATOM_R_MIN;
- break;
+ IntNo = AMDILISD::ATOM_R_MIN; break;
case AMDILIntrinsic::AMDIL_atomic_min_ru32:
case AMDILIntrinsic::AMDIL_atomic_min_ru64:
- IntNo = AMDILISD::ATOM_R_UMIN;
- break;
+ IntNo = AMDILISD::ATOM_R_UMIN; break;
case AMDILIntrinsic::AMDIL_atomic_min_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_min_ri64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_MIN_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_min_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_min_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_UMIN_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_or_gi32:
case AMDILIntrinsic::AMDIL_atomic_or_gu32:
case AMDILIntrinsic::AMDIL_atomic_or_gi64:
case AMDILIntrinsic::AMDIL_atomic_or_gu64:
- IntNo = AMDILISD::ATOM_G_OR;
- break;
+ IntNo = AMDILISD::ATOM_G_OR; break;
case AMDILIntrinsic::AMDIL_atomic_or_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_or_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_or_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_or_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_OR_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_OR_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_or_li32:
case AMDILIntrinsic::AMDIL_atomic_or_lu32:
case AMDILIntrinsic::AMDIL_atomic_or_li64:
case AMDILIntrinsic::AMDIL_atomic_or_lu64:
- IntNo = AMDILISD::ATOM_L_OR;
- break;
+ IntNo = AMDILISD::ATOM_L_OR; break;
case AMDILIntrinsic::AMDIL_atomic_or_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_or_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_or_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_or_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_OR_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_OR_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_or_ri32:
case AMDILIntrinsic::AMDIL_atomic_or_ru32:
case AMDILIntrinsic::AMDIL_atomic_or_ri64:
case AMDILIntrinsic::AMDIL_atomic_or_ru64:
- IntNo = AMDILISD::ATOM_R_OR;
- break;
+ IntNo = AMDILISD::ATOM_R_OR; break;
case AMDILIntrinsic::AMDIL_atomic_or_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_or_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_or_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_or_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_OR_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_OR_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_sub_gi32:
case AMDILIntrinsic::AMDIL_atomic_sub_gu32:
case AMDILIntrinsic::AMDIL_atomic_sub_gi64:
case AMDILIntrinsic::AMDIL_atomic_sub_gu64:
- IntNo = AMDILISD::ATOM_G_SUB;
- break;
+ IntNo = AMDILISD::ATOM_G_SUB; break;
case AMDILIntrinsic::AMDIL_atomic_sub_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_SUB_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_sub_li32:
case AMDILIntrinsic::AMDIL_atomic_sub_lu32:
case AMDILIntrinsic::AMDIL_atomic_sub_li64:
case AMDILIntrinsic::AMDIL_atomic_sub_lu64:
- IntNo = AMDILISD::ATOM_L_SUB;
- break;
+ IntNo = AMDILISD::ATOM_L_SUB; break;
case AMDILIntrinsic::AMDIL_atomic_sub_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_SUB_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_sub_ri32:
case AMDILIntrinsic::AMDIL_atomic_sub_ru32:
case AMDILIntrinsic::AMDIL_atomic_sub_ri64:
case AMDILIntrinsic::AMDIL_atomic_sub_ru64:
- IntNo = AMDILISD::ATOM_R_SUB;
- break;
+ IntNo = AMDILISD::ATOM_R_SUB; break;
case AMDILIntrinsic::AMDIL_atomic_sub_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_sub_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_SUB_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_rsub_gi32:
case AMDILIntrinsic::AMDIL_atomic_rsub_gu32:
case AMDILIntrinsic::AMDIL_atomic_rsub_gi64:
case AMDILIntrinsic::AMDIL_atomic_rsub_gu64:
- IntNo = AMDILISD::ATOM_G_RSUB;
- break;
+ IntNo = AMDILISD::ATOM_G_RSUB; break;
case AMDILIntrinsic::AMDIL_atomic_rsub_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_RSUB_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_rsub_li32:
case AMDILIntrinsic::AMDIL_atomic_rsub_lu32:
case AMDILIntrinsic::AMDIL_atomic_rsub_li64:
case AMDILIntrinsic::AMDIL_atomic_rsub_lu64:
- IntNo = AMDILISD::ATOM_L_RSUB;
- break;
+ IntNo = AMDILISD::ATOM_L_RSUB; break;
case AMDILIntrinsic::AMDIL_atomic_rsub_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_RSUB_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_rsub_ri32:
case AMDILIntrinsic::AMDIL_atomic_rsub_ru32:
case AMDILIntrinsic::AMDIL_atomic_rsub_ri64:
case AMDILIntrinsic::AMDIL_atomic_rsub_ru64:
- IntNo = AMDILISD::ATOM_R_RSUB;
- break;
+ IntNo = AMDILISD::ATOM_R_RSUB; break;
case AMDILIntrinsic::AMDIL_atomic_rsub_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_rsub_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_RSUB_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_xchg_gf32:
bitCastToInt = true;
case AMDILIntrinsic::AMDIL_atomic_xchg_gi32:
case AMDILIntrinsic::AMDIL_atomic_xchg_gu32:
case AMDILIntrinsic::AMDIL_atomic_xchg_gi64:
case AMDILIntrinsic::AMDIL_atomic_xchg_gu64:
- IntNo = AMDILISD::ATOM_G_XCHG;
- break;
+ IntNo = AMDILISD::ATOM_G_XCHG; break;
case AMDILIntrinsic::AMDIL_atomic_xchg_gf32_noret:
bitCastToInt = true;
case AMDILIntrinsic::AMDIL_atomic_xchg_gi32_noret:
@@ -2212,16 +1017,14 @@
case AMDILIntrinsic::AMDIL_atomic_xchg_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_xchg_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_XCHG_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_xchg_lf32:
bitCastToInt = true;
case AMDILIntrinsic::AMDIL_atomic_xchg_li32:
case AMDILIntrinsic::AMDIL_atomic_xchg_lu32:
case AMDILIntrinsic::AMDIL_atomic_xchg_li64:
case AMDILIntrinsic::AMDIL_atomic_xchg_lu64:
- IntNo = AMDILISD::ATOM_L_XCHG;
- break;
+ IntNo = AMDILISD::ATOM_L_XCHG; break;
case AMDILIntrinsic::AMDIL_atomic_xchg_lf32_noret:
bitCastToInt = true;
case AMDILIntrinsic::AMDIL_atomic_xchg_li32_noret:
@@ -2229,16 +1032,14 @@
case AMDILIntrinsic::AMDIL_atomic_xchg_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_xchg_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_XCHG_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_xchg_rf32:
bitCastToInt = true;
case AMDILIntrinsic::AMDIL_atomic_xchg_ri32:
case AMDILIntrinsic::AMDIL_atomic_xchg_ru32:
case AMDILIntrinsic::AMDIL_atomic_xchg_ri64:
case AMDILIntrinsic::AMDIL_atomic_xchg_ru64:
- IntNo = AMDILISD::ATOM_R_XCHG;
- break;
+ IntNo = AMDILISD::ATOM_R_XCHG; break;
case AMDILIntrinsic::AMDIL_atomic_xchg_rf32_noret:
bitCastToInt = true;
case AMDILIntrinsic::AMDIL_atomic_xchg_ri32_noret:
@@ -2246,58 +1047,49 @@
case AMDILIntrinsic::AMDIL_atomic_xchg_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_xchg_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_XCHG_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_xor_gi32:
case AMDILIntrinsic::AMDIL_atomic_xor_gu32:
case AMDILIntrinsic::AMDIL_atomic_xor_gi64:
case AMDILIntrinsic::AMDIL_atomic_xor_gu64:
- IntNo = AMDILISD::ATOM_G_XOR;
- break;
+ IntNo = AMDILISD::ATOM_G_XOR; break;
case AMDILIntrinsic::AMDIL_atomic_xor_gi32_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_gu32_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_gi64_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_gu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_G_XOR_NORET;
- break;
+ IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_xor_li32:
case AMDILIntrinsic::AMDIL_atomic_xor_lu32:
case AMDILIntrinsic::AMDIL_atomic_xor_li64:
case AMDILIntrinsic::AMDIL_atomic_xor_lu64:
- IntNo = AMDILISD::ATOM_L_XOR;
- break;
+ IntNo = AMDILISD::ATOM_L_XOR; break;
case AMDILIntrinsic::AMDIL_atomic_xor_li32_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_lu32_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_li64_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_lu64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_L_XOR_NORET;
- break;
+ IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
case AMDILIntrinsic::AMDIL_atomic_xor_ri32:
case AMDILIntrinsic::AMDIL_atomic_xor_ru32:
case AMDILIntrinsic::AMDIL_atomic_xor_ri64:
case AMDILIntrinsic::AMDIL_atomic_xor_ru64:
- IntNo = AMDILISD::ATOM_R_XOR;
- break;
+ IntNo = AMDILISD::ATOM_R_XOR; break;
case AMDILIntrinsic::AMDIL_atomic_xor_ri32_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_ru32_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_ri64_noret:
case AMDILIntrinsic::AMDIL_atomic_xor_ru64_noret:
isRet = false;
- IntNo = AMDILISD::ATOM_R_XOR_NORET;
- break;
+ IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
case AMDILIntrinsic::AMDIL_append_alloc_i32:
- IntNo = AMDILISD::APPEND_ALLOC;
- break;
+ IntNo = AMDILISD::APPEND_ALLOC; break;
case AMDILIntrinsic::AMDIL_append_consume_i32:
- IntNo = AMDILISD::APPEND_CONSUME;
- break;
+ IntNo = AMDILISD::APPEND_CONSUME; break;
};
const AMDILSubtarget *stm = &this->getTargetMachine()
.getSubtarget<AMDILSubtarget>();
AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
- stm->getKernelManager());
+ stm->getKernelManager());
KM->setOutputInst();
Info.opc = IntNo;
@@ -2307,7 +1099,7 @@
Info.align = 4;
Info.vol = true;
Info.readMem = isRet;
- Info.writeMem = true;
+ Info.writeMem = isStore;
return true;
}
// The backend supports 32 and 64 bit floating point immediates
@@ -2321,7 +1113,6 @@
return false;
}
}
-
bool
AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
{
@@ -2332,8 +1123,6 @@
return true;
}
}
-
-
// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
// be zero. Op is expected to be a target specific node. Used by DAG
// combiner.
@@ -2351,20 +1140,20 @@
unsigned BitWidth = KnownZero.getBitWidth();
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything
switch (Op.getOpcode()) {
- default:
- break;
+ default: break;
+ case ISD::SELECT_CC:
case AMDILISD::SELECT_CC:
DAG.ComputeMaskedBits(
Op.getOperand(1),
KnownZero,
KnownOne,
Depth + 1
- );
+ );
DAG.ComputeMaskedBits(
Op.getOperand(0),
KnownZero2,
KnownOne2
- );
+ );
assert((KnownZero & KnownOne) == 0
&& "Bits known to be one AND zero?");
assert((KnownZero2 & KnownOne2) == 0
@@ -2375,7 +1164,6 @@
break;
};
}
-
// This is the function that determines which calling convention should
// be used. Currently there is only one calling convention
CCAssignFn*
@@ -2384,7 +1172,6 @@
//uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
return CC_AMDIL32;
}
-
// LowerCallResult - Lower the result values of an ISD::CALL into the
// appropriate copies out of appropriate physical registers. This assumes that
// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
@@ -2412,12 +1199,12 @@
EVT CopyVT = RVLocs[i].getValVT();
if (RVLocs[i].isRegLoc()) {
Chain = DAG.getCopyFromReg(
- Chain,
- dl,
- RVLocs[i].getLocReg(),
- CopyVT,
- InFlag
- ).getValue(1);
+ Chain,
+ dl,
+ RVLocs[i].getLocReg(),
+ CopyVT,
+ InFlag
+ ).getValue(1);
SDValue Val = Chain.getValue(0);
InFlag = Chain.getValue(2);
InVals.push_back(Val);
@@ -2425,29 +1212,11 @@
}
return Chain;
-
}
-
//===----------------------------------------------------------------------===//
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
-MachineBasicBlock *
-AMDILTargetLowering::EmitInstrWithCustomInserter(
- MachineInstr *MI, MachineBasicBlock *BB) const
-{
- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
- switch (MI->getOpcode()) {
- ExpandCaseToAllTypes(AMDIL::CMP);
- generateCMPInstr(MI, BB, TII);
- MI->eraseFromParent();
- break;
- default:
- break;
- }
- return BB;
-}
-
// Recursively assign SDNodeOrdering to any unordered nodes
// This is necessary to maintain source ordering of instructions
// under -O0 to avoid odd-looking "skipping around" issues.
@@ -2461,10 +1230,9 @@
}
return New;
}
-
#define LOWER(A) \
- case ISD:: A: \
-return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
+case ISD:: A: \
+ return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower ## A(Op, DAG) )
SDValue
AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
@@ -2497,11 +1265,9 @@
LOWER(EXTRACT_SUBVECTOR);
LOWER(SCALAR_TO_VECTOR);
LOWER(CONCAT_VECTORS);
- LOWER(AND);
- LOWER(OR);
+ LOWER(SETCC);
LOWER(SELECT);
LOWER(SELECT_CC);
- LOWER(SETCC);
LOWER(SIGN_EXTEND_INREG);
LOWER(BITCAST);
LOWER(DYNAMIC_STACKALLOC);
@@ -2511,7 +1277,6 @@
}
return Op;
}
-
int
AMDILTargetLowering::getVarArgsFrameOffset() const
{
@@ -2534,9 +1299,22 @@
int32_t arrayoffset = AMI->getArrayOffset(G->getName().str());
int32_t constoffset = AMI->getConstOffset(G->getName().str());
if (arrayoffset != -1) {
- DST = DAG.getConstant(arrayoffset, PtrVT);
- DST = DAG.getNode(ISD::ADD, DL, PtrVT,
- DST, DAG.getConstant(base_offset, PtrVT));
+ // We will do per-pointer local buffer allocation.
+ // Here we temporarily use an addri node to represent the address
+ // of the local array. It will be replaced in AMDILPointerManager
+ // when we figure out which local pointer is allocated in its own buffer.
+ const AMDILSubtarget *stm = &this->getTargetMachine()
+ .getSubtarget<AMDILSubtarget>();
+ if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+ && G->getType()->getAddressSpace() == AMDILAS::LOCAL_ADDRESS) {
+ SDValue addr = DAG.getTargetGlobalAddress(G, DL, PtrVT);
+ DST = DAG.getConstant(base_offset, PtrVT);
+ DST = DAG.getNode(AMDILISD::ADDADDR, DL, PtrVT, addr, DST);
+ } else {
+ DST = DAG.getConstant(arrayoffset, PtrVT);
+ DST = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DST, DAG.getConstant(base_offset, PtrVT));
+ }
} else if (constoffset != -1) {
if (AMI->getConstHWBit(G->getName().str())) {
DST = DAG.getConstant(constoffset, PtrVT);
@@ -2558,7 +1336,6 @@
const Constant *C = dyn_cast<Constant>(GV->getInitializer());
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
DST = DAG.getConstant(CI->getValue(), Op.getValueType());
-
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
DST = DAG.getConstantFP(CF->getValueAPF(),
Op.getValueType());
@@ -2582,7 +1359,6 @@
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
@@ -2598,14 +1374,15 @@
SDValue Result;
if (CP->isMachineConstantPoolEntry()) {
Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ CP->getAlignment(),
+ CP->getOffset(), CP->getTargetFlags());
} else {
Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ CP->getAlignment(),
+ CP->getOffset(), CP->getTargetFlags());
}
return Result;
}
-
SDValue
AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
{
@@ -2618,18 +1395,20 @@
/// arguments places on the stack.
/// TODO: isVarArg, hasStructRet, isMemReg
SDValue
-AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
+AMDILTargetLowering::LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
const
{
MachineFunction &MF = DAG.getMachineFunction();
AMDILMachineFunctionInfo *FuncInfo
- = MF.getInfo<AMDILMachineFunctionInfo>();
+ = MF.getInfo<AMDILMachineFunctionInfo>();
MachineFrameInfo *MFI = MF.getFrameInfo();
//const Function *Fn = MF.getFunction();
//MachineRegisterInfo &RegInfo = MF.getRegInfo();
@@ -2653,40 +1432,40 @@
EVT RegVT = VA.getLocVT();
EVT ValVT = VA.getValVT();
const TargetRegisterClass *RC = getRegClassFromType(
- RegVT.getSimpleVT().SimpleTy);
+ RegVT.getSimpleVT().SimpleTy);
unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
FuncInfo->addArgReg(VA.getLocReg());
SDValue ArgValue = DAG.getCopyFromReg(
- Chain,
- dl,
- Reg,
- RegVT);
+ Chain,
+ dl,
+ Reg,
+ RegVT);
// If this is an 8 or 16-bit value, it is really passed
// promoted to 32 bits. Insert an assert[sz]ext to capture
// this, then truncate to the right size.
if (VA.getLocInfo() == CCValAssign::SExt) {
ArgValue = DAG.getNode(
- ISD::AssertSext,
- dl,
- RegVT,
- ArgValue,
- DAG.getValueType(ValVT));
+ ISD::AssertSext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(ValVT));
} else if (VA.getLocInfo() == CCValAssign::ZExt) {
ArgValue = DAG.getNode(
- ISD::AssertZext,
- dl,
- RegVT,
- ArgValue,
- DAG.getValueType(ValVT));
+ ISD::AssertZext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(ValVT));
}
if (VA.getLocInfo() != CCValAssign::Full) {
ArgValue = DAG.getNode(
- ISD::TRUNCATE,
- dl,
- ValVT,
- ArgValue);
+ ISD::TRUNCATE,
+ dl,
+ ValVT,
+ ArgValue);
}
// Add the value to the list of arguments
// to be passed in registers
@@ -2724,18 +1503,16 @@
/// function parameter.
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
- ISD::ArgFlagsTy Flags, SelectionDAG &DAG)
-{
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
assert(0 && "MemCopy does not exist yet");
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain,
Src.getDebugLoc(),
Dst, Src, SizeNode, Flags.getByValAlign(),
- /*IsVol=*/false, /*AlwaysInline=*/true,
+ /*IsVol=*/ false, /*AlwaysInline=*/ true,
MachinePointerInfo(), MachinePointerInfo());
}
-
SDValue
AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
@@ -2762,12 +1539,17 @@
/// CALLSEQ_END are emitted.
/// TODO: isVarArg, isTailCall, hasStructRet
SDValue
-AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool doesNotReturn, bool isVarArg, bool& isTailCall,
+AMDILTargetLowering::LowerCall(SDValue Chain,
+ SDValue Callee,
+ CallingConv::ID CallConv,
+ bool doesNotReturn,
+ bool isVarArg,
+ bool& isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ DebugLoc dl,
+ SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const
{
@@ -2810,8 +1592,7 @@
SDValue Arg = OutVals[i];
//Promote the value if needed
switch(VA.getLocInfo()) {
- default:
- assert(0 && "Unknown loc info!");
+ default: assert(0 && "Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::SExt:
@@ -2837,7 +1618,7 @@
// Create the frame index object for this incoming parameter
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
VA.getLocMemOffset(), true
- );
+ );
SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
// emit ISD::STORE whichs stores the
@@ -2874,9 +1655,11 @@
// node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
- } else if (isTailCall) {
+ }
+ else if (isTailCall) {
assert(0 && "Tail calls are not handled yet");
// see X86 ISelLowering for ideas on implementation: 1708
}
@@ -2923,10 +1706,10 @@
// Create the CALLSEQ_END node
Chain = DAG.getCALLSEQ_END(
- Chain,
- DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true),
- InFlag);
+ Chain,
+ DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true),
+ InFlag);
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that
// we return
@@ -2992,7 +1775,6 @@
is24bitMAD = true;
}
}
-
SDValue
AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
{
@@ -3018,20 +1800,32 @@
} else {
SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
// TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
- LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
- RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
- LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
- RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY,
+ LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY,
+ RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY,
+ LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY,
+ RHS);
INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
SDValue cmp;
- cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- INTLO, RHSLO);
- cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
- INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
- DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
- INTLO, INTHI);
+ cmp = DAG.getSetCC(DL, INTTY, INTLO, RHSLO, ISD::SETULT);
+ INTHI = DAG.getNode(ISD::SUB, DL, INTTY, INTHI, cmp);
+ DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ OVT,
+ INTLO,
+ INTHI);
}
} else {
if (LHS.getOpcode() == ISD::FrameIndex ||
@@ -3067,24 +1861,26 @@
} else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
Op1 = LHS.getOperand(0);
Op2 = DAG.getConstant(
- 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
+ 1 << LHSConstOpCode->getZExtValue(), MVT::i32);
Op3 = RHS;
} else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
Op1 = RHS.getOperand(0);
Op2 = DAG.getConstant(
- 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
+ 1 << RHSConstOpCode->getZExtValue(), MVT::i32);
Op3 = LHS;
}
checkMADType(Op, stm, is24bitMAD, is32bitMAD);
// We can possibly do a MAD transform!
- if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
+ if (is24bitMAD &&
+ stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
uint32_t opcode = AMDILIntrinsic::AMDIL_mad24_i32;
- SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ SDVTList Tys = DAG.getVTList(OVT /*, MVT::Other*/);
DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
- DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
+ DL, Tys, DAG.getEntryNode(),
+ DAG.getConstant(opcode, MVT::i32),
Op1, Op2, Op3);
} else if(is32bitMAD) {
- SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
+ SDVTList Tys = DAG.getVTList(OVT /*, MVT::Other*/);
DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
DL, Tys, DAG.getEntryNode(),
DAG.getConstant(
@@ -3143,9 +1939,8 @@
// uint tint = as_uint(t)
SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
// int cmp = (x != 0)
- SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
- DAG.getConstant(0, INTTY));
+ SDValue cmp = DAG.getSetCC(DL, INTTY, x, DAG.getConstant(0,
+ INTTY), ISD::SETNE);
// uint tsrc = tint >> 23
SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
DAG.getConstant(23, INTTY));
@@ -3156,11 +1951,10 @@
SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
DAG.getConstant((103U + bits), INTTY), tmask);
// return cmp ? cst : N
- cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
- DAG.getConstant(bits, INTTY));
+ cst = DAG.getSelect(DL, INTTY, cmp, cst,
+ DAG.getConstant(bits, INTTY));
return cst;
}
-
SDValue
AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
{
@@ -3168,7 +1962,7 @@
DebugLoc DL = Op.getDebugLoc();
EVT INTTY = Op.getValueType();
const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
+ &this->getTargetMachine())->getSubtargetImpl();
if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
//__clz_32bit(uint u)
//{
@@ -3178,14 +1972,17 @@
// uint u = op
SDValue u = Op;
// int z = __amdil_ffb_hi(u)
- SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
+ SDValue z = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, INTTY,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::
+ AMDIL_bit_find_first_hi, MVT::i32),
+ u);
// int cmp = z < 0
- SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- z, DAG.getConstant(0, INTTY));
+ SDValue cmp = DAG.getSetCC(DL, INTTY, z, DAG.getConstant(0,
+ INTTY), ISD::SETLT);
// return cmp ? 32 : z
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
- DAG.getConstant(32, INTTY), z);
+ DST = DAG.getSelect(DL, INTTY, cmp,
+ DAG.getConstant(32, INTTY), z);
} else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
// static inline uint
//__clz_32bit(uint x)
@@ -3207,15 +2004,15 @@
// uint zl = __clz_16bit(xa16)
SDValue zl = genCLZuN(xa16, DAG, 16);
// uint cmp = zh == 16U
- SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- zh, DAG.getConstant(16U, INTTY));
+ SDValue cmp = DAG.getSetCC(DL, INTTY, zh, DAG.getConstant(16U,
+ INTTY),
+ ISD::SETEQ);
// uint zl16 = zl + 16
SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
DAG.getConstant(16, INTTY), zl);
// return cmp ? zl16 : zh
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
- cmp, zl16, zh);
+ DST = DAG.getSelect(DL, INTTY,
+ cmp, zl16, zh);
} else {
assert(0 && "Attempting to generate a CLZ function with an"
" unknown graphics card");
@@ -3237,7 +2034,7 @@
INTTY = EVT(MVT::i32);
}
const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
+ &this->getTargetMachine())->getSubtargetImpl();
if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
// Evergreen:
// static inline uint
@@ -3250,22 +2047,28 @@
//ulong x = op
SDValue x = Op;
// uint xhi = x >> 32
- SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY,
+ x);
// uint xlo = x & 0xFFFFFFFF
- SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY,
+ x);
// uint zhi = __clz_32bit(xhi)
SDValue zhi = genCLZu32(xhi, DAG);
// uint zlo = __clz_32bit(xlo)
SDValue zlo = genCLZu32(xlo, DAG);
// uint cmp = zhi == 32
- SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- zhi, DAG.getConstant(32U, INTTY));
+ SDValue cmp = DAG.getSetCC(DL, INTTY, zhi, DAG.getConstant(32U,
+ INTTY),
+ ISD::SETEQ);
// uint zlop32 = 32 + zlo
SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
DAG.getConstant(32U, INTTY), zlo);
// return cmp ? zlop32: zhi
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
+ DST = DAG.getSelect(DL, INTTY, cmp, zlop32, zhi);
} else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
// HD4XXX:
// static inline uint
@@ -3293,7 +2096,10 @@
SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
DAG.getConstant(0x7fffffU, INTTY));
// uint ix = (uint)x
- SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
+ SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY,
+ x);
// uint xm23 = ix & 0x7FFFFF
SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
DAG.getConstant(0x7fffffU, INTTY));
@@ -3309,24 +2115,20 @@
SDValue const18 = DAG.getConstant(18, INTTY);
SDValue const41 = DAG.getConstant(41, INTTY);
// uint cmp1 = zh = 18
- SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- zhm5, const18);
+ SDValue cmp1 = DAG.getSetCC(DL, INTTY, zhm5, const18, ISD::SETEQ);
// uint zhm5zm = zhm5 + zh
SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
// uint cmp2 = zhm5zm == 41
- SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- zhm5zm, const41);
+ SDValue cmp2 = DAG.getSetCC(DL, INTTY, zhm5zm, const41, ISD::SETEQ);
// uint zmp18 = zhm5 + 18
SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
// uint zlp41 = zl + 41
SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
// uint r = cmp1 ? zmp18 : zh
- SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
- cmp1, zmp18, zhm5);
+ SDValue r = DAG.getSelect(DL, INTTY,
+ cmp1, zmp18, zhm5);
// return cmp2 ? zlp41 : r
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
+ DST = DAG.getSelect(DL, INTTY, cmp2, zlp41, r);
} else {
assert(0 && "Attempting to generate a CLZ function with an"
" unknown graphics card");
@@ -3334,6 +2136,151 @@
return DST;
}
SDValue
+AMDILTargetLowering::genf32toi64(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ EVT LHSVT = (isVec) ? MVT::v2i64 : MVT::i64;
+ EVT INTVT = (isVec) ? MVT::v2i32 : MVT::i32;
+ //cf2ul(float f)
+ //{
+ // float fh = f * 0x1.0p-32f;
+ // uint uh = (uint)fh;
+ // float fuh = (float)uh;
+ // float fl = mad(-0x1.0p+32f, fuh, f);
+ // uint ul = (uint)fl;
+ // return ((ulong)uh << 32) | (ulong)ul;
+ //}
+ // Signed
+ //cf2l(float f)
+ //{
+ // int s = as_int(f) & 0x80000000;
+ // ulong u = cf2ul(as_float(as_uint(f) ^ s));
+ // long ls = s ? -1L : 0L;
+ // return ((long)u + ls) ^ ls;
+ //}
+ SDValue fh, uh, fuh, fl, ul, r, s, f;
+ f = RHS;
+ if (includeSign) {
+ SDValue fi = DAG.getNode(ISDBITCAST, DL, INTVT, f);
+ s = DAG.getNode(ISD::AND, DL, INTVT,
+ fi, DAG.getConstant(0x80000000, INTVT));
+ f = DAG.getNode(ISDBITCAST, DL, RHSVT,
+ DAG.getNode(ISD::XOR, DL, INTVT, fi, s));
+ }
+ fh = DAG.getNode(ISD::FMUL, DL, RHSVT,
+ DAG.getNode(ISD::BITCAST, DL, RHSVT,
+ DAG.getConstant(0x2F800000, INTVT)), f);
+ uh = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, fh);
+ fuh = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uh);
+ fl = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, RHSVT,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_mad, MVT::i32),
+ DAG.getNode(ISD::BITCAST, DL, RHSVT,
+ DAG.getConstant(0xCF800000, INTVT)), fuh, f);
+ ul = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, fl);
+ r = DAG.getNode(ISD::OR, DL, LHSVT,
+ DAG.getNode(ISD::SHL, DL, LHSVT,
+ DAG.getZExtOrTrunc(uh, DL,
+ LHSVT),
+ DAG.getConstant(32, LHSVT)),
+ DAG.getZExtOrTrunc(ul, DL, LHSVT));
+ if (includeSign) {
+ SDValue ls = DAG.getSelect(DL, LHSVT,
+ DAG.getZExtOrTrunc(s, DL, LHSVT),
+ DAG.getConstant(-1L, LHSVT),
+ DAG.getConstant(0L, LHSVT));
+ r = DAG.getNode(ISD::ADD, DL, LHSVT, r, ls);
+ r = DAG.getNode(ISD::XOR, DL, LHSVT, r, ls);
+ }
+ return r;
+}
+SDValue
+AMDILTargetLowering::geni64tof32(SDValue RHS, SelectionDAG &DAG,
+ bool includeSign) const
+{
+ DebugLoc DL = RHS.getDebugLoc();
+ EVT RHSVT = RHS.getValueType();
+ bool isVec = RHSVT.isVector();
+ EVT LHSVT = (isVec) ? MVT::v2f32 : MVT::f32;
+ EVT INTVT = (isVec) ? MVT::v2i32 : MVT::i32;
+ // Unsigned
+ // cul2f(ulong u)
+ //{
+ // uint lz = clz(u);
+ // uint e = (u != 0) ? 127U + 63U - lz : 0;
+ // u = (u << lz) & 0x7fffffffffffffffUL;
+ // ulong t = u & 0xffffffffffUL;
+ // uint v = (e << 23) | (uint)(u >> 40);
+ // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+ // return as_float(v + r);
+ //}
+ // Signed
+ // cl2f(long l)
+ //{
+ // long s = l >> 63;
+ // float r = cul2f((l + s) ^ s);
+ // return s ? -r : r;
+ //}
+ SDValue l = RHS;
+ SDValue s;
+ if (includeSign) {
+ s = DAG.getNode(ISD::SRA, DL, RHSVT, l,
+ DAG.getConstant(63, RHSVT));
+ SDValue s_add = DAG.getNode(ISD::ADD, DL, RHSVT,
+ l, s);
+ l = DAG.getNode(ISD::XOR, DL, RHSVT, s_add, s);
+ }
+ SDValue lz = genCLZu64(l, DAG);
+ SDValue e = DAG.getSelect(DL, INTVT,
+ DAG.getZExtOrTrunc(
+ DAG.getSetCC(DL, getSetCCResultType(RHSVT), l,
+ DAG.getConstant(0,
+ RHSVT), ISD::SETNE),
+ DL, INTVT),
+ DAG.getNode(ISD::SUB, DL, INTVT,
+ DAG.getConstant(127U + 63U, INTVT), lz),
+ DAG.getConstant(0, INTVT));
+ SDValue u = DAG.getNode(ISD::AND, DL, RHSVT,
+ DAG.getNode(ISD::SHL, DL, RHSVT, l, lz),
+ DAG.getConstant((-1ULL) >> 1, RHSVT));
+ SDValue t = DAG.getNode(ISD::AND, DL, RHSVT, u,
+ DAG.getConstant(0xffffffffffULL, RHSVT));
+ SDValue v = DAG.getNode(ISD::OR, DL, INTVT,
+ DAG.getNode(ISD::SHL, DL, INTVT, e,
+ DAG.getConstant(23, INTVT)),
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SRL, DL, RHSVT, u,
+ DAG.getConstant(40, RHSVT)),
+ DL, INTVT));
+ SDValue r_cmp = DAG.getZExtOrTrunc(
+ DAG.getSetCC(DL, getSetCCResultType(RHSVT), t,
+ DAG.getConstant(0x8000000000ULL, RHSVT),
+ ISD::SETUGT), DL, INTVT);
+ SDValue t_cmp = DAG.getZExtOrTrunc(
+ DAG.getSetCC(DL, getSetCCResultType(RHSVT), t,
+ DAG.getConstant(0x8000000000ULL, RHSVT),
+ ISD::SETEQ), DL, INTVT);
+ SDValue r = DAG.getSelect(DL, INTVT,
+ r_cmp, DAG.getConstant(1U, INTVT),
+ DAG.getSelect(DL, INTVT, t_cmp,
+ DAG.getNode(ISD::AND, DL, INTVT, v,
+ DAG.getConstant(1U, INTVT)),
+ DAG.getConstant(0U, INTVT)));
+ r = DAG.getNode(ISDBITCAST, DL, LHSVT,
+ DAG.getNode(ISD::ADD, DL, INTVT, v, r));
+ if (includeSign) {
+ SDValue r_neg = DAG.getNode(ISD::FSUB, DL, LHSVT,
+ DAG.getConstantFP(0, LHSVT), r);
+ r = DAG.getSelect(DL, getSetCCResultType(LHSVT),
+ DAG.getSExtOrTrunc(s, DL, getSetCCResultType(LHSVT))
+ , r_neg, r);
+ }
+ return r;
+}
+SDValue
AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
bool includeSign) const
{
@@ -3353,7 +2300,7 @@
INTVT = EVT(MVT::i32);
}
const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
+ &this->getTargetMachine())->getSubtargetImpl();
if (0 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
// unsigned version:
// uint uhi = (uint)(d * 0x1.0p-32);
@@ -3377,16 +2324,22 @@
SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
val = 0xC1F0000000000000ULL;
dval = *(double*)&val;
- ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
- DAG.getConstantFP(dval, RHSVT), d);
+ ulod = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, RHSVT,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_mad, MVT::i32),
+ ulod, DAG.getConstantFP(dval, RHSVT), d);
SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
- SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
+ SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ LONGVT,
+ ulo,
+ uhi);
if (includeSign) {
- SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
- SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
- RHS, d);
- l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
+ SDValue nl =
+ DAG.getNode(ISD::XOR, DL, LONGVT, l, DAG.getConstant(~0ULL, LONGVT));
+ SDValue c = DAG.getSetCC(DL, getSetCCResultType(
+ RHSVT), RHS, d, ISD::SETEQ);
+ l = DAG.getSelect(DL, LONGVT, c, l, nl);
}
DST = l;
} else {
@@ -3452,8 +2405,14 @@
// Convert d in to 32-bit components
SDValue d = RHS;
SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
- SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
- SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTVT,
+ x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTVT,
+ x );
// Generate 'normalized' mantissa
SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
@@ -3471,12 +2430,12 @@
e, DAG.getConstant( 0x7ff, INTVT ) );
SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
DAG.getConstant( 1023 + 63, INTVT ), e );
- SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- sr, DAG.getConstant(64, INTVT));
- SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- sr, DAG.getConstant(32, INTVT));
+ SDValue srge64 = DAG.getSetCC(DL, INTVT, sr, DAG.getConstant(64,
+ INTVT),
+ ISD::SETGE);
+ SDValue srge32 = DAG.getSetCC(DL, INTVT, sr, DAG.getConstant(32,
+ INTVT),
+ ISD::SETGE);
// Compute result for 0 <= sr < 32
SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
@@ -3484,27 +2443,35 @@
temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp );
- rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
+ rlo0 = DAG.getNode( ISD::SELECT, DL, INTVT, sr, temp, rlo0 );
// Compute result for 32 <= sr
SDValue rhi1 = DAG.getConstant( 0, INTVT );
- SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ SDValue rlo1 = DAG.getNode( ISD::SELECT, DL, INTVT,
srge64, rhi1, rhi0 );
// Pick between the 2 results
- SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ SDValue rhi = DAG.getNode( ISD::SELECT, DL, INTVT,
srge32, rhi1, rhi0 );
- SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ SDValue rlo = DAG.getNode( ISD::SELECT, DL, INTVT,
srge32, rlo1, rlo0 );
// Create long
- SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
+ SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ LONGVT,
+ rlo,
+ rhi );
// Deal with sign bit
if (includeSign) {
SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
xhi, DAG.getConstant( 31, INTVT ) );
- sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
+ sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ LONGVT,
+ sign,
+ sign );
res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
}
@@ -3573,8 +2540,14 @@
// Convert d in to 32-bit components
SDValue d = RHS;
SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
- SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
- SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
+ SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTVT,
+ x );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTVT,
+ x );
// Generate 'normalized' mantissa
SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
@@ -3591,13 +2564,13 @@
e, DAG.getConstant( 0x7ff, INTVT ) );
SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
DAG.getConstant( 1023 + 31, INTVT ), e );
- SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- sr, DAG.getConstant(32, INTVT));
+ SDValue srge32 = DAG.getSetCC(DL, INTVT, sr, DAG.getConstant(32,
+ INTVT),
+ ISD::SETGE);
// Compute result for 0 <= sr < 32
SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
- res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
+ res = DAG.getNode( ISD::SELECT, DL, INTVT,
srge32, DAG.getConstant(0,INTVT), res );
// Deal with sign bit
@@ -3624,7 +2597,7 @@
(&this->getTargetMachine());
const AMDILSubtarget*
stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
+ amdtm->getSubtargetImpl());
if (RST == MVT::f64 && RHSVT.isVector()) {
// We dont support vector 64bit floating point convertions.
for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
@@ -3640,8 +2613,8 @@
}
} else if (RST == MVT::f64
&& LST == MVT::i32) {
- if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()
- && stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ if (!RHSVT.isVector() &&
+ stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
DST = SDValue(Op.getNode(), 0);
} else {
DST = genf64toi32(RHS, DAG, true);
@@ -3651,19 +2624,21 @@
DST = genf64toi64(RHS, DAG, true);
} else if (RST == MVT::f64
&& (LST == MVT::i8 || LST == MVT::i16)) {
- if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()) {
+ if (!RHSVT.isVector()) {
DST = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32, RHS);
DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, DST);
} else {
SDValue ToInt = genf64toi32(RHS, DAG, true);
DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
}
+ } else if (RST == MVT::f32
+ && LST == MVT::i64) {
+ DST = genf32toi64(RHS, DAG, true);
} else {
DST = SDValue(Op.getNode(), 0);
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
{
@@ -3679,7 +2654,7 @@
(&this->getTargetMachine());
const AMDILSubtarget*
stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
+ amdtm->getSubtargetImpl());
if (RST == MVT::f64 && RHSVT.isVector()) {
// We dont support vector 64bit floating point convertions.
for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
@@ -3692,12 +2667,11 @@
DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
DST, op, DAG.getTargetConstant(x, MVT::i32));
}
-
}
} else if (RST == MVT::f64
&& LST == MVT::i32) {
- if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()
- && stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
+ if (!RHSVT.isVector() &&
+ stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
DST = SDValue(Op.getNode(), 0);
} else {
DST = genf64toi32(RHS, DAG, false);
@@ -3707,13 +2681,16 @@
DST = genf64toi64(RHS, DAG, false);
} else if (RST == MVT::f64
&& (LST == MVT::i8 || LST == MVT::i16)) {
- if (stm->calVersion() >= CAL_VERSION_SC_155 && !RHSVT.isVector()) {
+ if (!RHSVT.isVector()) {
DST = DAG.getNode(ISD::FP_TO_UINT, DL, MVT::i32, RHS);
DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, DST);
} else {
SDValue ToInt = genf64toi32(RHS, DAG, false);
DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
}
+ } else if (RST == MVT::f32
+ && LST == MVT::i64) {
+ DST = genf32toi64(RHS, DAG, false);
} else {
DST = SDValue(Op.getNode(), 0);
}
@@ -3743,47 +2720,20 @@
(&this->getTargetMachine());
const AMDILSubtarget*
stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
- if (stm->calVersion() >= CAL_VERSION_SC_135) {
- // unsigned x = RHS;
- // ulong xd = (ulong)(0x4330_0000 << 32) | x;
- // double d = as_double( xd );
- // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
- SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
- DAG.getConstant( 0x43300000, INTVT ) );
- SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
- SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
- DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
- return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
- } else {
- SDValue clz = genCLZu32(x, DAG);
-
- // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
- // Except for an input 0... which requires a 0 exponent
- SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
- DAG.getConstant( (1023+31), INTVT), clz );
- exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
-
- // Normalize frac
- SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
-
- // Eliminate hidden bit
- rhi = DAG.getNode( ISD::AND, DL, INTVT,
- rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
-
- // Pack exponent and frac
- SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
- rhi, DAG.getConstant( (32 - 11), INTVT ) );
- rhi = DAG.getNode( ISD::SRL, DL, INTVT,
- rhi, DAG.getConstant( 11, INTVT ) );
- exp = DAG.getNode( ISD::SHL, DL, INTVT,
- exp, DAG.getConstant( 20, INTVT ) );
- rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
-
- // Convert 2 x 32 in to 1 x 64, then to double precision float type
- SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
- return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
- }
+ amdtm->getSubtargetImpl());
+ // unsigned x = RHS;
+ // ulong xd = (ulong)(0x4330_0000 << 32) | x;
+ // double d = as_double( xd );
+ // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ LONGVT,
+ x,
+ DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
+ SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
+ DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
+ return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
}
SDValue
AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
@@ -3803,104 +2753,55 @@
LONGVT = RHSVT;
SDValue x = RHS;
const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
+ &this->getTargetMachine())->getSubtargetImpl();
if (0 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
// double dhi = (double)(as_uint2(x).y);
// double dlo = (double)(as_uint2(x).x);
// return mad(dhi, 0x1.0p+32, dlo)
- SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
+ SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTVT,
+ x);
dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
- SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
+ SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTVT,
+ x);
dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
uint64_t val = 0x41f0000000000000ULL;
double dval = *(double*)&val;
- return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
- DAG.getConstantFP(dval, LHSVT), dlo);
- } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
+ return DAG.getNode(ISD::INTRINSIC_W_CHAIN,
+ DL, LHSVT, DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_mad, MVT::i32),
+ dhi, DAG.getConstantFP(dval, LHSVT), dlo);
+ } else {
// double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
// double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
// return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
- SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL
- SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
+ SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTVT,
+ x ); // x & 0xffff_ffffUL
+ SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ LONGVT,
+ xlo,
+ DAG.getConstant( 0x43300000, INTVT ) );
SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
- SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
- SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
+ SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTVT,
+ x ); // x >> 32
+ SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ LONGVT,
+ xhi,
+ DAG.getConstant( 0x45300000, INTVT ) );
SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
-
- } else {
- SDValue clz = genCLZu64(x, DAG);
- SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
- SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
-
- // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
- SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
- DAG.getConstant( (1023+63), INTVT), clz );
- SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
- exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- mash, exp, mash ); // exp = exp, or 0 if input was 0
-
- // Normalize frac
- SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
- clz, DAG.getConstant( 31, INTVT ) );
- SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
- DAG.getConstant( 32, INTVT ), clz31 );
- SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
- SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
- t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
- SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
- SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
- SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
- SDValue rlo2 = DAG.getConstant( 0, INTVT );
- SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
- clz, DAG.getConstant( 32, INTVT ) );
- SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- clz32, rhi2, rhi1 );
- SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
- clz32, rlo2, rlo1 );
-
- // Eliminate hidden bit
- rhi = DAG.getNode( ISD::AND, DL, INTVT,
- rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
-
- // Save bits needed to round properly
- SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
- rlo, DAG.getConstant( 0x7ff, INTVT ) );
-
- // Pack exponent and frac
- rlo = DAG.getNode( ISD::SRL, DL, INTVT,
- rlo, DAG.getConstant( 11, INTVT ) );
- SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
- rhi, DAG.getConstant( (32 - 11), INTVT ) );
- rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
- rhi = DAG.getNode( ISD::SRL, DL, INTVT,
- rhi, DAG.getConstant( 11, INTVT ) );
- exp = DAG.getNode( ISD::SHL, DL, INTVT,
- exp, DAG.getConstant( 20, INTVT ) );
- rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
-
- // Compute rounding bit
- SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
- rlo, DAG.getConstant( 1, INTVT ) );
- SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
- round, DAG.getConstant( 0x3ff, INTVT ) );
- grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
- DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
- grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
- grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
- round = DAG.getNode( ISD::SRL, DL, INTVT,
- round, DAG.getConstant( 10, INTVT ) );
- round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
-
- // Add rounding bit
- SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
- round, DAG.getConstant( 0, INTVT ) );
- SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
- res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
- return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
}
}
SDValue
@@ -3920,7 +2821,7 @@
(&this->getTargetMachine());
const AMDILSubtarget*
stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
+ amdtm->getSubtargetImpl());
if (LST == MVT::f64 && LHSVT.isVector()) {
// We dont support vector 64bit floating point convertions.
DST = Op;
@@ -3934,12 +2835,10 @@
DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
op, DAG.getTargetConstant(x, MVT::i32));
}
-
}
} else if (RST == MVT::i32
&& LST == MVT::f64) {
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD4XXX
- && stm->calVersion() >= CAL_VERSION_SC_155) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD4XXX) {
DST = SDValue(Op.getNode(), 0);
} else {
DST = genu32tof64(RHS, LHSVT, DAG);
@@ -3947,12 +2846,14 @@
} else if (RST == MVT::i64
&& LST == MVT::f64) {
DST = genu64tof64(RHS, LHSVT, DAG);
+ } else if (RST == MVT::i64
+ && LST == MVT::f32) {
+ DST = geni64tof32(RHS, DAG, false);
} else {
DST = SDValue(Op.getNode(), 0);
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
{
@@ -3971,7 +2872,7 @@
(&this->getTargetMachine());
const AMDILSubtarget*
stm = dynamic_cast<const AMDILSubtarget*>(
- amdtm->getSubtargetImpl());
+ amdtm->getSubtargetImpl());
if (LST == MVT::f64 && LHSVT.isVector()) {
// We dont support vector 64bit floating point convertions.
for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
@@ -3985,9 +2886,10 @@
op, DAG.getTargetConstant(x, MVT::i32));
}
}
-
+ } else if (RST == MVT::i64
+ && LST == MVT::f32) {
+ DST = geni64tof32(RHS, DAG, true);
} else {
-
if (isVec) {
LONGVT = EVT(MVT::getVectorVT(MVT::i64,
RHSVT.getVectorNumElements()));
@@ -4001,8 +2903,7 @@
if ((RST == MVT::i32 || RST == MVT::i64)
&& LST == MVT::f64) {
if (RST == MVT::i32) {
- if (stm->device()->getGeneration() > AMDILDeviceInfo::HD4XXX
- && stm->calVersion() >= CAL_VERSION_SC_155) {
+ if (stm->device()->getGeneration() > AMDILDeviceInfo::HD4XXX) {
DST = SDValue(Op.getNode(), 0);
return DST;
}
@@ -4016,10 +2917,18 @@
Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
} else { // 64-bit case... SRA of 64-bit values is slow
- SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
+ SDValue hi = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTVT,
+ RHS );
Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
- S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
+ S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ RHSVT,
+ temp,
+ temp );
}
// get abs() of input value, given sign as S (0 or -1)
@@ -4040,10 +2949,22 @@
// drop in the sign bit
SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
- SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
- SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
+ SDValue thi = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTVT,
+ t );
+ SDValue tlo = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTVT,
+ t );
thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
- t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
+ t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ LONGVT,
+ tlo,
+ thi );
DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
} else {
DST = SDValue(Op.getNode(), 0);
@@ -4073,35 +2994,45 @@
}
SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
// TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
- LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
- RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
- LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
- RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
+ LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY,
+ LHS);
+ RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY,
+ RHS);
+ LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY,
+ LHS);
+ RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY,
+ RHS);
INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
//TODO: need to use IBORROW on HD5XXX and later hardware
SDValue cmp;
if (OVT == MVT::i64) {
- cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- LHSLO, RHSLO);
+ cmp = DAG.getSetCC(DL, INTTY, LHSLO, RHSLO, ISD::SETULT);
} else {
SDValue cmplo;
SDValue cmphi;
SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
+ DL, MVT::i32, LHSLO,
+ DAG.getTargetConstant(0, MVT::i32));
SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
+ DL, MVT::i32, LHSLO,
+ DAG.getTargetConstant(1, MVT::i32));
SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
+ DL, MVT::i32, RHSLO,
+ DAG.getTargetConstant(0, MVT::i32));
SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
- DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
- cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- LHSRLO, RHSRLO);
- cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
- DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
- LHSRHI, RHSRHI);
+ DL, MVT::i32, RHSLO,
+ DAG.getTargetConstant(1, MVT::i32));
+ cmplo = DAG.getSetCC(DL, MVT::i32, LHSRLO, RHSRLO, ISD::SETULT);
+ cmphi = DAG.getSetCC(DL, MVT::i32, LHSRHI, RHSRHI, ISD::SETULT);
cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
@@ -4128,7 +3059,6 @@
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
{
@@ -4146,7 +3076,6 @@
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
{
@@ -4164,7 +3093,6 @@
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
{
@@ -4183,7 +3111,6 @@
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
{
@@ -4202,7 +3129,6 @@
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
{
@@ -4210,10 +3136,12 @@
EVT OVT = Op.getValueType();
SDValue DST;
bool isVec = OVT.isVector();
- if (OVT.getScalarType() != MVT::i64) {
+ if (OVT.getScalarType() != MVT::i64)
+ {
DST = SDValue(Op.getNode(), 0);
} else {
- assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
+ assert(
+ OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
// TODO: This needs to be turned into a tablegen pattern
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -4223,18 +3151,26 @@
INTTY = MVT::v2i32;
}
// mul64(h1, l1, h0, l0)
- SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
- DL,
- INTTY, LHS);
- SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
- DL,
- INTTY, LHS);
- SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
- DL,
- INTTY, RHS);
- SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
- DL,
- INTTY, RHS);
+ SDValue LHSLO = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY,
+ LHS);
+ SDValue LHSHI = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY,
+ LHS);
+ SDValue RHSLO = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ INTTY,
+ RHS);
+ SDValue RHSHI = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ INTTY,
+ RHS);
// MULLO_UINT_1 r1, h0, l1
SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
DL,
@@ -4296,43 +3232,42 @@
fourth = Op.getOperand(3);
if (fourth.getOpcode() != ISD::UNDEF) {
Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- fourth,
- DAG.getConstant(7, MVT::i32));
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ fourth,
+ DAG.getConstant(7, MVT::i32));
}
case 3:
third = Op.getOperand(2);
if (third.getOpcode() != ISD::UNDEF) {
Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- third,
- DAG.getConstant(6, MVT::i32));
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ third,
+ DAG.getConstant(6, MVT::i32));
}
case 2:
second = Op.getOperand(1);
if (second.getOpcode() != ISD::UNDEF) {
Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- second,
- DAG.getConstant(5, MVT::i32));
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ second,
+ DAG.getConstant(5, MVT::i32));
}
break;
};
return Nodes1;
}
-
SDValue
AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const
+ SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -4375,20 +3310,19 @@
DL, VT, Op.getOperand(0), *ptr,
DAG.getTargetConstant(mask2, MVT::i32),
DAG.getTargetConstant(mask3, MVT::i32));
- SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
- DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
- Op.getOperand(2), DAG.getConstant(x, MVT::i32));
+ SDValue c = DAG.getSetCC(DL, getSetCCResultType(ptr->getValueType()),
+ Op.getOperand(2),
+ DAG.getConstant(x - 1, MVT::i32), ISD::SETEQ);
c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
- res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
+ res = DAG.getSelect(DL, VT, c, t, res);
}
DST = res;
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const
+ SelectionDAG &DAG) const
{
EVT VT = Op.getValueType();
//printSDValue(Op, 1);
@@ -4421,22 +3355,19 @@
SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
DL, VT, Op0,
DAG.getTargetConstant(x, MVT::i32));
- SDValue c = DAG.getNode(AMDILISD::CMP,
- DL, Op1.getValueType(),
- DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
- Op1, DAG.getConstant(x, MVT::i32));
- res = DAG.getNode(AMDILISD::CMOVLOG, DL,
- VT, c, t, res);
-
+ SDValue c = DAG.getSetCC(DL, getSetCCResultType(Op1.getValueType()),
+ Op1, DAG.getConstant(x - 1,
+ MVT::i32), ISD::SETEQ);
+ res = DAG.getSelect(DL,
+ VT, c, t, res);
}
Res = res;
}
return Res;
}
-
SDValue
AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
- SelectionDAG &DAG) const
+ SelectionDAG &DAG) const
{
uint32_t vecSize = Op.getValueType().getVectorNumElements();
SDValue src = Op.getOperand(0);
@@ -4481,7 +3412,7 @@
}
SDValue
AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
- SelectionDAG &DAG) const
+ SelectionDAG &DAG) const
{
SDValue Res = DAG.getNode(AMDILISD::VBUILD,
Op.getDebugLoc(),
@@ -4490,27 +3421,42 @@
return Res;
}
SDValue
-AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
+AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
{
- SDValue andOp;
- andOp = DAG.getNode(
- AMDILISD::AND,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0),
- Op.getOperand(1));
- return andOp;
-}
-SDValue
-AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue orOp;
- orOp = DAG.getNode(AMDILISD::OR,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0),
- Op.getOperand(1));
- return orOp;
+ ISD::CondCode CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OpVT = Op.getValueType();
+ if (!OpVT.isVector()) return SDValue();
+ EVT OpSVT = OpVT.getScalarType();
+ EVT SVT = LHS.getValueType().getScalarType();
+ EVT ccSVT = getSetCCResultType(SVT);
+ assert((SVT == MVT::f64 || SVT == MVT::i64) &&
+ "we don't support expansion of SetCC on non-64bit types!");
+ SDValue ccOp;
+ for (unsigned x = 0, y = OpVT.getVectorNumElements(); x < y; ++x) {
+ SDValue lhsComp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT,
+ LHS, DAG.getTargetConstant(x, MVT::i32));
+ SDValue rhsComp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT,
+ RHS, DAG.getTargetConstant(x, MVT::i32));
+ SDValue opComp = DAG.getSetCC(DL, ccSVT, lhsComp, rhsComp, CC);
+ // Need to handle the case where we are splitting up a
+ // setCC where the result is less than 32bits.
+ if (ccSVT != OpSVT && SVT.isInteger()) {
+ opComp = DAG.getSExtOrTrunc(opComp, DL, OpSVT);
+ }
+ if (!x) {
+ ccOp = DAG.getNode(AMDILISD::VBUILD, DL, OpVT, opComp);
+ } else {
+ ccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT,
+ ccOp, opComp, DAG.getTargetConstant(x, MVT::i32));
+ }
+ }
+ if (OpSVT != SVT) {
+ ccOp = DAG.getSExtOrTrunc(ccOp, DL, OpVT);
+ }
+ return ccOp;
}
SDValue
AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
@@ -4519,11 +3465,10 @@
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
DebugLoc DL = Op.getDebugLoc();
- Cond = getConversionNode(DAG, Cond, Op, true);
- Cond = DAG.getNode(AMDILISD::CMOVLOG,
- DL,
- Op.getValueType(), Cond, LHS, RHS);
- return Cond;
+ if (LHS.getValueType().isVector()) {
+ return DAG.getNode(ISD::VSELECT, DL, Op.getValueType(), Cond, LHS, RHS);
+ }
+ return SDValue();
}
SDValue
AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
@@ -4542,74 +3487,36 @@
// Check for possible elimination of cmov
if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
const ConstantSDNode *trueConst
- = dyn_cast<ConstantSDNode>( TRUE.getNode() );
+ = dyn_cast<ConstantSDNode>( TRUE.getNode() );
const ConstantSDNode *falseConst
- = dyn_cast<ConstantSDNode>( FALSE.getNode() );
+ = dyn_cast<ConstantSDNode>( FALSE.getNode() );
if (trueConst && falseConst) {
// both possible result values are constants
if (trueConst->isAllOnesValue()
&& falseConst->isNullValue()) { // and convenient constants
skipCMov = true;
- } else if (trueConst->isNullValue()
- && falseConst->isAllOnesValue()) { // less convenient
+ }
+ else if (trueConst->isNullValue()
+ && falseConst->isAllOnesValue()) { // less convenient
skipCMov = true;
genINot = true;
}
}
}
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- unsigned int AMDILCC = CondCCodeToCC(
- SetCCOpcode,
- LHS.getValueType().getSimpleVT().SimpleTy);
- assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
- Cond = DAG.getNode(
- AMDILISD::CMP,
- DL,
- LHS.getValueType(),
- DAG.getConstant(AMDILCC, MVT::i32),
- LHS,
- RHS);
- Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getSetCC(DL, getSetCCResultType(LHS.getValueType()),
+ LHS, RHS, SetCCOpcode);
if (genINot) {
- Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
+ Cond = DAG.getNode(ISD::XOR, DL, OVT, Cond, DAG.getConstant(-1, OVT));
}
if (!skipCMov) {
- Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
+ Cond = DAG.getSelect(DL, OVT, Cond, TRUE, FALSE);
}
return Cond;
}
SDValue
-AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Cond;
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue CC = Op.getOperand(2);
- DebugLoc DL = Op.getDebugLoc();
- ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- unsigned int AMDILCC = CondCCodeToCC(
- SetCCOpcode,
- LHS.getValueType().getSimpleVT().SimpleTy);
- assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
- Cond = DAG.getNode(
- AMDILISD::CMP,
- DL,
- LHS.getValueType(),
- DAG.getConstant(AMDILCC, MVT::i32),
- LHS,
- RHS);
- Cond = getConversionNode(DAG, Cond, Op, true);
- Cond = DAG.getNode(
- ISD::AND,
- DL,
- Cond.getValueType(),
- DAG.getConstant(1, Cond.getValueType()),
- Cond);
- return Cond;
-}
-
-SDValue
-AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const
{
SDValue Data = Op.getOperand(0);
VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
@@ -4661,7 +3568,6 @@
}
}
}
-
SDValue
AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
{
@@ -4733,16 +3639,28 @@
w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
- Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
+ Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
+ DL,
+ MVT::i64,
+ x,
+ y);
return Res;
} else {
// case 2
- SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
+ SDValue lo = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
+ DL,
+ MVT::i32,
+ Src);
SDValue lor16
- = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
- SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
+ SDValue hi = DAG.getNode(
+ (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
+ DL,
+ MVT::i32,
+ Src);
SDValue hir16
- = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
+ = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
MVT::v4i32, lo);
SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
@@ -4787,7 +3705,7 @@
SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
#else
SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
- DAG.getConstant(0, IntTy));
+ DAG.getConstant(0, IntTy.getScalarType()));
for (uint32_t x = 0; x < SrcNumEle; ++x) {
SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
MVT::i32, DAG.getConstant(x, MVT::i32));
@@ -4800,7 +3718,8 @@
}
#endif
SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
- DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
+ DAG.getConstant((1 << ScalarSrcSize) - 1,
+ MVT::i32));
SDValue *newEle = new SDValue[SrcNumEle];
res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
for (uint32_t x = 0; x < SrcNumEle; ++x) {
@@ -4819,12 +3738,14 @@
}
for (uint32_t x = 0; x < SrcNumEle; x += 2) {
newEle[x] = DAG.getNode(ISD::OR, DL,
- IntTy.getScalarType(), newEle[x], newEle[x + 1]);
+ IntTy.getScalarType(), newEle[x],
+ newEle[x + 1]);
}
if (ScalarSrcSize == 8) {
for (uint32_t x = 0; x < SrcNumEle; x += 4) {
newEle[x] = DAG.getNode(ISD::OR, DL,
- IntTy.getScalarType(), newEle[x], newEle[x + 2]);
+ IntTy.getScalarType(), newEle[x],
+ newEle[x + 2]);
}
if (DstNumEle == 1) {
Dst = newEle[0];
@@ -4858,7 +3779,7 @@
// case 2:
EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
- DAG.getConstant(0, IntTy));
+ DAG.getConstant(0, IntTy.getScalarType()));
uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
for (uint32_t x = 0; x < SrcNumEle; ++x) {
for (uint32_t y = 0; y < mult; ++y) {
@@ -4920,9 +3841,11 @@
SDValue *numEle = new SDValue[DstNumEle];
for (uint32_t x = 0; x < SrcNumEle; ++x) {
numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- MVT::i16, Src, DAG.getConstant(x, MVT::i32));
+ MVT::i16, Src,
+ DAG.getConstant(x, MVT::i32));
numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
- numEle[x * 2], DAG.getConstant(8, MVT::i16));
+ numEle[x * 2],
+ DAG.getConstant(8, MVT::i16));
}
MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
@@ -4944,10 +3867,9 @@
Dst.getValueType(), Src);
return Res;
}
-
SDValue
AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const
+ SelectionDAG &DAG) const
{
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
@@ -4963,24 +3885,24 @@
DL,
SPReg, NewSP);
SDValue Ops[2] = {NewSP, Chain};
- Chain = DAG.getMergeValues(Ops, 2 ,DL);
+ Chain = DAG.getMergeValues(Ops, 2,DL);
return Chain;
}
SDValue
AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
SDValue Chain = Op.getOperand(0);
+ SDValue Entry = Op.getOperand(1);
SDValue Cond = Op.getOperand(1);
SDValue Jump = Op.getOperand(2);
SDValue Result;
Result = DAG.getNode(
- AMDILISD::BRANCH_COND,
- Op.getDebugLoc(),
- Op.getValueType(),
- Chain, Jump, Cond);
+ AMDILISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
return Result;
}
-
SDValue
AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
{
@@ -4992,44 +3914,35 @@
SDValue CmpValue;
ISD::CondCode CC = CCNode->get();
SDValue Result;
- unsigned int cmpOpcode = CondCCodeToCC(
- CC,
- LHS.getValueType().getSimpleVT().SimpleTy);
- CmpValue = DAG.getNode(
- AMDILISD::CMP,
- Op.getDebugLoc(),
- LHS.getValueType(),
- DAG.getConstant(cmpOpcode, MVT::i32),
- LHS, RHS);
+ CmpValue = DAG.getSetCC(Op.getDebugLoc(), getSetCCResultType(LHS.getValueType()),
+ LHS, RHS, CC);
Result = DAG.getNode(
- AMDILISD::BRANCH_COND,
- CmpValue.getDebugLoc(),
- MVT::Other, Chain,
- JumpT, CmpValue);
+ AMDILISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
return Result;
}
-
SDValue
AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
{
SDValue Result = DAG.getNode(
- AMDILISD::DP_TO_FP,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0),
- Op.getOperand(1));
+ AMDILISD::DP_TO_FP,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
return Result;
}
-
SDValue
AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
{
SDValue Result = DAG.getNode(
- AMDILISD::VCONCAT,
- Op.getDebugLoc(),
- Op.getValueType(),
- Op.getOperand(0),
- Op.getOperand(1));
+ AMDILISD::VCONCAT,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Op.getOperand(0),
+ Op.getOperand(1));
return Result;
}
// LowerRET - Lower an ISD::RET node.
@@ -5068,7 +3981,7 @@
SDValue Flag;
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain);
- RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
+ RetOps.push_back(DAG.getConstant(0 /*getBytesToPopOnReturn()*/, MVT::i32));
for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
SDValue ValToCopy = OutVals[i];
@@ -5097,163 +4010,20 @@
MVT::Other, &RetOps[0], RetOps.size());
return Flag;
}
-void
-AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
- unsigned int opCode) const
-{
- MachineOperand DST = MI->getOperand(0);
- MachineOperand LHS = MI->getOperand(2);
- MachineOperand RHS = MI->getOperand(3);
- unsigned int opi32Code = 0, si32Code = 0;
- unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
- uint32_t REGS[12];
- switch (simpleVT) {
- case AMDIL::GPRI64RegClassID:
- simpleVT = AMDIL::GPRI32RegClassID;
- break;
- case AMDIL::GPRV2I64RegClassID:
- simpleVT = AMDIL::GPRV2I64RegClassID;
- break;
- };
- // All the relationals can be generated with with 6 temp registers
- for (int x = 0; x < 12; ++x) {
- REGS[x] = genVReg(simpleVT);
- }
- // Pull out the high and low components of each 64 bit register
- generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
- generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
- generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
- generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
- // Determine the correct opcode that we should use
- switch(opCode) {
- default:
- assert(!"comparison case not handled!");
- break;
- case AMDIL::LEQ:
- si32Code = opi32Code = AMDIL::IEQ;
- break;
- case AMDIL::LNE:
- si32Code = opi32Code = AMDIL::INE;
- break;
- case AMDIL::LLE:
- case AMDIL::ULLE:
- case AMDIL::LGE:
- case AMDIL::ULGE:
- if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
- std::swap(REGS[0], REGS[2]);
- } else {
- std::swap(REGS[1], REGS[3]);
- }
- if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
- opi32Code = AMDIL::ILT;
- } else {
- opi32Code = AMDIL::ULT;
- }
- si32Code = AMDIL::UGE;
- break;
- case AMDIL::LGT:
- case AMDIL::ULGT:
- std::swap(REGS[0], REGS[2]);
- std::swap(REGS[1], REGS[3]);
- case AMDIL::LLT:
- case AMDIL::ULLT:
- if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
- opi32Code = AMDIL::ILT;
- } else {
- opi32Code = AMDIL::ULT;
- }
- si32Code = AMDIL::ULT;
- break;
- };
- // Do the initial opcode on the high and low components.
- // This leaves the following:
- // REGS[4] = L_HI OP R_HI
- // REGS[5] = L_LO OP R_LO
- generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
- generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
- switch(opi32Code) {
- case AMDIL::IEQ:
- case AMDIL::INE: {
- // combine the results with an and or or depending on if
- // we are eq or ne
- uint32_t combineOp = (opi32Code == AMDIL::IEQ)
- ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
- generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
- }
- break;
- default:
- // this finishes codegen for the following pattern
- // REGS[4] || (REGS[5] && (L_HI == R_HI))
- generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
- generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
- REGS[9]);
- generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
- REGS[10]);
- break;
- }
- generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
-}
-
unsigned int
AMDILTargetLowering::getFunctionAlignment(const Function *) const
{
return 0;
}
-
bool
AMDILTargetLowering::isLoadBitCastBeneficial(EVT lVT, EVT bVT) const
{
return !(lVT.getSizeInBits() == bVT.getSizeInBits()
- && lVT.getScalarType().getSizeInBits() > bVT.getScalarType().getSizeInBits()
+ && lVT.getScalarType().getSizeInBits() >
+ bVT.getScalarType().getSizeInBits()
&& bVT.getScalarType().getSizeInBits() < 32
&& lVT.getScalarType().getSizeInBits() >= 32);
}
-
-void
-AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
- MachineBasicBlock::iterator &BBI,
- DebugLoc *DL, const TargetInstrInfo *TII) const
-{
- mBB = BB;
- mBBI = BBI;
- mDL = DL;
- mTII = TII;
-}
-uint32_t
-AMDILTargetLowering::genVReg(uint32_t regType) const
-{
- return mBB->getParent()->getRegInfo().createVirtualRegister(
- getRegClassFromID(regType));
-}
-
-MachineInstrBuilder
-AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
-{
- return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
-}
-
-MachineInstrBuilder
-AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
- uint32_t src1) const
-{
- return generateMachineInst(opcode, dst).addReg(src1);
-}
-
-MachineInstrBuilder
-AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
- uint32_t src1, uint32_t src2) const
-{
- return generateMachineInst(opcode, dst, src1).addReg(src2);
-}
-
-MachineInstrBuilder
-AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
- uint32_t src1, uint32_t src2, uint32_t src3) const
-{
- return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
-}
-
-
SDValue
AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
{
@@ -5298,7 +4068,10 @@
SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
// float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+ SDValue fq = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, FLTTY,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_div, MVT::i32),
+ fa, fb);
// fq = trunc(fq);
fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
@@ -5307,7 +4080,10 @@
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
// float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
+ SDValue fr = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, FLTTY,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_mad, MVT::i32),
+ fqneg, fb, fa);
// int iq = (int)fq;
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
@@ -5319,15 +4095,14 @@
fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
// int cv = fr >= fb;
- SDValue cv = DAG.getSetCC(DL, OVT, fr, fb, ISD::SETOGE);
+ SDValue cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETGE);
// jq = (cv ? jq : 0);
- jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, DAG.getConstant(0, OVT));
+ jq = DAG.getSelect(DL, OVT, cv, jq, DAG.getConstant(0, OVT));
// dst = iq + jq;
iq = DAG.getSExtOrTrunc(iq, DL, OVT);
iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
return iq;
}
-
SDValue
AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
{
@@ -5356,15 +4131,9 @@
SDValue r1 = RHS;
// ilt r10, r0, 0
- SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- r0, DAG.getConstant(0, OVT));
-
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
// ilt r11, r1, 0
- SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- r1, DAG.getConstant(0, OVT));
-
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
@@ -5390,13 +4159,11 @@
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
return DST;
}
-
SDValue
AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
{
return SDValue(Op.getNode(), 0);
}
-
SDValue
AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
{
@@ -5441,37 +4208,35 @@
SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
// float fq = native_divide(fa, fb)
- SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+ SDValue fq = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, FLTTY,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_div, MVT::i32),
+ fa, fb);
// fq = trunc(fq)
fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
// float t = mad(fq, fb, fb)
- SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
+ SDValue t = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, FLTTY,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_mad, MVT::i32),
+ fq, fb, fb);
// int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
SDValue iq;
fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
- if (INTTY == MVT::i32) {
- iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
- } else {
- iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
- }
- iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
-
+ iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETLE);
+ iq = DAG.getNode(ISD::SUB, DL, INTTY, fq, iq);
// return (type)iq
iq = DAG.getZExtOrTrunc(iq, DL, OVT);
return iq;
-
}
-
SDValue
AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
{
return SDValue(Op.getNode(), 0);
}
-
SDValue
AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
{
@@ -5494,7 +4259,6 @@
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
return LHS;
}
-
SDValue
AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
{
@@ -5512,7 +4276,6 @@
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
return LHS;
}
-
SDValue
AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
{
@@ -5542,14 +4305,9 @@
SDValue r1 = RHS;
// ilt r10, r0, 0
- SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- r0, DAG.getConstant(0, OVT));
-
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
// ilt r11, r1, 0
- SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- r1, DAG.getConstant(0, OVT));
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
// iadd r0, r0, r10
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
@@ -5579,13 +4337,11 @@
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
return DST;
}
-
SDValue
AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
{
return SDValue(Op.getNode(), 0);
}
-
SDValue
AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
{
@@ -5626,15 +4382,15 @@
DAG.getConstant(0xFF, INTTY));
// cmov_logical r3, r11, r11, 0x1
- SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
- DAG.getConstant(0x01, INTTY));
+ SDValue r3 = DAG.getSelect(DL, INTTY, r11, r11,
+ DAG.getConstant(0x01, INTTY));
// udiv r3, r10, r3
r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
// cmov_logical r3, r11, r3, 0
- r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
- DAG.getConstant(0, INTTY));
+ r3 = DAG.getSelect(DL, INTTY, r11, r3,
+ DAG.getConstant(0, INTTY));
// umul r3, r3, r11
r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
@@ -5648,7 +4404,6 @@
DST = DAG.getZExtOrTrunc(DST, DL, OVT);
return DST;
}
-
SDValue
AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
{
@@ -5691,8 +4446,8 @@
DAG.getConstant(0xFFFF, OVT));
// cmov_logical r3, r11, r11, 0x1
- SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
- DAG.getConstant(0x01, OVT));
+ SDValue r3 = DAG.getSelect(DL, OVT, r11, r11,
+ DAG.getConstant(0x01, OVT));
// udiv as_u16(r3), as_u32(r10), as_u32(r3)
r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
@@ -5706,8 +4461,8 @@
DAG.getConstant(0xFFFF, OVT));
// cmov_logical r3, r11, r3, 0
- r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
- DAG.getConstant(0, OVT));
+ r3 = DAG.getSelect(DL, OVT, r11, r3,
+ DAG.getConstant(0, OVT));
// umul r3, r3, r11
r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
@@ -5719,7 +4474,6 @@
DAG.getConstant(0xFFFF, OVT));
return DST;
}
-
SDValue
AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
{
@@ -5742,14 +4496,11 @@
SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
return DST;
}
-
SDValue
AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
{
return SDValue(Op.getNode(), 0);
}
-
-
SDValue
AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
{
@@ -5765,7 +4516,7 @@
SDValue RHS = Op.getOperand(1);
SDValue DST;
const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
- &this->getTargetMachine())->getSubtargetImpl();
+ &this->getTargetMachine())->getSubtargetImpl();
if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
// TODO: This doesn't work for vector types yet
// The LowerFDIV32 function generates equivalent to the following
@@ -5834,25 +4585,23 @@
DAG.getConstant(0x807FFFFF, INTTY));
// ieq r40, r30, 0x7F800000
- SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- R30, DAG.getConstant(0x7F800000, INTTY));
+ SDValue R40 =
+ DAG.getSetCC(DL, INTTY, R30, DAG.getConstant(0x7F800000,
+ INTTY), ISD::SETEQ);
// ieq r41, r31, 0x7F800000
- SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- R31, DAG.getConstant(0x7F800000, INTTY));
+ SDValue R41 =
+ DAG.getSetCC(DL, INTTY, R31, DAG.getConstant(0x7F800000,
+ INTTY), ISD::SETEQ);
// ieq r42, r30, 0
- SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- R30, DAG.getConstant(0, INTTY));
-
+ SDValue R42 = DAG.getSetCC(DL, INTTY, R30, DAG.getConstant(0,
+ INTTY),
+ ISD::SETEQ);
// ieq r43, r31, 0
- SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
- R31, DAG.getConstant(0, INTTY));
-
+ SDValue R43 = DAG.getSetCC(DL, INTTY, R31, DAG.getConstant(0,
+ INTTY),
+ ISD::SETEQ);
// and r50, r20, 0x80000000
SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
DAG.getConstant(0x80000000, INTTY));
@@ -5870,16 +4619,16 @@
DAG.getConstant(0x3F800000, INTTY));
// cmov_logical r32, r42, r50, r32
- R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
+ R32 = DAG.getSelect(DL, INTTY, R42, R50, R32);
// cmov_logical r33, r43, r51, r33
- R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
+ R33 = DAG.getSelect(DL, INTTY, R43, R51, R33);
// cmov_logical r32, r40, r20, r32
- R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
+ R32 = DAG.getSelect(DL, INTTY, R40, R20, R32);
// cmov_logical r33, r41, r21, r33
- R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
+ R33 = DAG.getSelect(DL, INTTY, R41, R21, R33);
// ior r50, r40, r41
R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
@@ -5891,18 +4640,21 @@
R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
// inegate r52, r31
- SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
+ SDValue R52 =
+ DAG.getNode(ISD::XOR, DL, INTTY, R31, DAG.getConstant(~0, INTTY));
// iadd r30, r30, r52
R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
// cmov_logical r30, r50, 0, r30
- R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
- DAG.getConstant(0, INTTY), R30);
+ R30 = DAG.getSelect(DL, INTTY, R50,
+ DAG.getConstant(0, INTTY), R30);
// div_zeroop(infinity) r21, 1.0, as_float(r33)
R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
- R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ R21 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, OVT,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_div, MVT::i32),
DAG.getConstantFP(1.0f, OVT), R33);
// mul_ieee as_int(r20), as_float(r32), r21
@@ -5912,7 +4664,9 @@
// div_zeroop(infinity) r21, 1.0, as_float(r33)
R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
- R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
+ R21 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, OVT,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_div, MVT::i32),
DAG.getConstantFP(1.0f, OVT), R33);
// mul_ieee as_int(r20), as_float(r32), r21
@@ -5946,31 +4700,23 @@
R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
// ige r42, 0, R60
- R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- DAG.getConstant(0, INTTY),
- R60);
-
+ R42 = DAG.getSetCC(DL, INTTY, DAG.getConstant(0, INTTY), R60, ISD::SETGE);
// ior r41, r23, 0x7F800000
R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
DAG.getConstant(0x7F800000, INTTY));
// ige r40, r60, 0x000000FF
- R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
- DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
- R60,
- DAG.getConstant(0x0000000FF, INTTY));
-
+ R40 = DAG.getSetCC(DL, INTTY, R60, DAG.getConstant(0xFF, INTTY), ISD::SETGE);
// cmov_logical r40, r50, 0, r40
- R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
- DAG.getConstant(0, INTTY),
- R40);
+ R40 = DAG.getSelect(DL, INTTY, R50,
+ DAG.getConstant(0, INTTY),
+ R40);
// cmov_logical r20, r42, r23, r20
- R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
+ R20 = DAG.getSelect(DL, INTTY, R42, R23, R20);
// cmov_logical DST, r40, r41, r20
- DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
+ DST = DAG.getSelect(DL, INTTY, R40, R41, R20);
// as_float(DST)
DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
@@ -5986,23 +4732,29 @@
// fabs r1, RHS
SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
// lt r2, 0x1.0p+96f, r1
- SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
- DAG.getConstant(0x6f800000, INTTY), r1);
+ SDValue cst1 = DAG.getConstant(0x6f800000, INTTY);
+ cst1 = DAG.getNode(ISDBITCAST, DL, OVT, cst1);
+ SDValue r2 = DAG.getSetCC(DL, INTTY, cst1, r1, ISD::SETLT);
// cmov_logical r3, r2, 0x1.0p-23f, 1.0f
- SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
- DAG.getConstant(0x2f800000, INTTY),
- DAG.getConstant(0x3f800000, INTTY));
+ cst1 = DAG.getConstant(0x2f800000, INTTY);
+ cst1 = DAG.getNode(ISDBITCAST, DL, OVT, cst1);
+ SDValue cst2 = DAG.getConstant(0x3f800000, INTTY);
+ cst2 = DAG.getNode(ISDBITCAST, DL, OVT, cst2);
+ SDValue r3 = DAG.getSelect(DL, OVT, r2,
+ cst1, cst2);
// mul_ieee r1, RHS, r3
r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
// div_zeroop(infinity) r0, LHS, r1
- SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
+ SDValue r0 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, OVT,
+ DAG.getEntryNode(),
+ DAG.getConstant(AMDILIntrinsic::AMDIL_div,
+ MVT::i32),
+ LHS, r1);
// mul_ieee DST, r0, r3
DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
}
return DST;
}
-
SDValue
AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
{
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILISelLowering.h Wed Sep 12 12:43:34 2012
@@ -25,74 +25,35 @@
{
namespace AMDILISD
{
-enum {
+enum
+{
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- INTTOANY, // Dummy instruction that takes an int and goes to
- // any type converts the SDNode to an int
- DP_TO_FP, // Conversion from 64bit FP to 32bit FP
- FP_TO_DP, // Conversion from 32bit FP to 64bit FP
- BITCONV, // instruction that converts from any type to any type
- CMOV, // 32bit FP Conditional move instruction
- CMOVLOG, // 32bit FP Conditional move logical instruction
- SELECT, // 32bit FP Conditional move logical instruction
- SETCC, // 32bit FP Conditional move logical instruction
- ISGN, // 32bit Int Sign instruction
- INEGATE, // 32bit Int Negation instruction
- MAD, // 32bit Fused Multiply Add instruction
- ADD, // 32/64 bit pseudo instruction
- AND, // 128 bit and instruction
- OR, // 128 bit or instruction
- NOT, // 128 bit not instruction
- XOR, // 128 bit xor instruction
- MOVE, // generic mov instruction
- PHIMOVE, // generic phi-node mov instruction
- VBUILD, // scalar to vector mov instruction
- VEXTRACT, // extract vector components
- VINSERT, // insert vector components
- VCONCAT, // concat a single vector to another vector
- UMAD, // 32bit UInt Fused Multiply Add instruction
- CALL, // Function call based on a single integer
- RET, // Return from a function call
- SELECT_CC, // Select the correct conditional instruction
- BRCC, // Select the correct branch instruction
- CMPCC, // Compare to GPR operands
- CMPICC, // Compare two GPR operands, set icc.
- CMPFCC, // Compare two FP operands, set fcc.
- BRICC, // Branch to dest on icc condition
- BRFCC, // Branch to dest on fcc condition
- SELECT_ICC, // Select between two values using the current ICC
- //flags.
- SELECT_FCC, // Select between two values using the current FCC
- //flags.
- LCREATE, // Create a 64bit integer from two 32 bit integers
- LCOMPHI, // Get the hi 32 bits from a 64 bit integer
- LCOMPLO, // Get the lo 32 bits from a 64 bit integer
- DCREATE, // Create a 64bit float from two 32 bit integers
- DCOMPHI, // Get the hi 32 bits from a 64 bit float
- DCOMPLO, // Get the lo 32 bits from a 64 bit float
- LCREATE2, // Create a 64bit integer from two 32 bit integers
- LCOMPHI2, // Get the hi 32 bits from a 64 bit integer
- LCOMPLO2, // Get the lo 32 bits from a 64 bit integer
- DCREATE2, // Create a 64bit float from two 32 bit integers
- DCOMPHI2, // Get the hi 32 bits from a 64 bit float
- DCOMPLO2, // Get the lo 32 bits from a 64 bit float
- UMUL, // 32bit unsigned multiplication
- IFFB_HI, // 32bit find first hi bit instruction
- IFFB_LO, // 32bit find first low bit instruction
- DIV_INF, // Divide with infinity returned on zero divisor
- SMAX, // Signed integer max
- CMP,
- IL_CC_I_GT,
- IL_CC_I_LT,
- IL_CC_I_GE,
- IL_CC_I_LE,
- IL_CC_I_EQ,
- IL_CC_I_NE,
+ DP_TO_FP, // Conversion from 64bit FP to 32bit FP
+ FP_TO_DP, // Conversion from 32bit FP to 64bit FP
+ BITCONV, // instruction that converts from any type to any type
+ ADD, // 32/64 bit pseudo instruction
+ VBUILD, // scalar to vector mov instruction
+ VEXTRACT, // extract vector components
+ VINSERT, // insert vector components
+ VCONCAT, // concat a single vector to another vector
+ CALL, // Function call based on a single integer
+ RET, // Return from a function call
+ SELECT_CC, // Select the correct conditional instruction
+ LCREATE, // Create a 64bit integer from two 32 bit integers
+ LCOMPHI, // Get the hi 32 bits from a 64 bit integer
+ LCOMPLO, // Get the lo 32 bits from a 64 bit integer
+ DCREATE, // Create a 64bit float from two 32 bit integers
+ DCOMPHI, // Get the hi 32 bits from a 64 bit float
+ DCOMPLO, // Get the lo 32 bits from a 64 bit float
+ LCREATE2, // Create a 64bit integer from two 32 bit integers
+ LCOMPHI2, // Get the hi 32 bits from a 64 bit integer
+ LCOMPLO2, // Get the lo 32 bits from a 64 bit integer
+ DCREATE2, // Create a 64bit float from two 32 bit integers
+ DCOMPHI2, // Get the hi 32 bits from a 64 bit float
+ DCOMPLO2, // Get the lo 32 bits from a 64 bit float
+ UMUL, // 32bit unsigned multiplication
RET_FLAG,
BRANCH_COND,
- LOOP_NZERO,
- LOOP_ZERO,
- LOOP_CMP,
ADDADDR,
// ATOMIC Operations
// Global Memory
@@ -110,6 +71,8 @@
ATOM_G_RSUB,
ATOM_G_XCHG,
ATOM_G_XOR,
+ ATOM_G_STORE,
+ ATOM_G_LOAD,
ATOM_G_ADD_NORET,
ATOM_G_AND_NORET,
ATOM_G_CMPXCHG_NORET,
@@ -215,7 +178,7 @@
LAST_ISD_NUMBER
};
-} // AMDILISD
+} // AMDILISD
class MachineBasicBlock;
class MachineInstr;
@@ -225,7 +188,7 @@
class AMDILTargetLowering : public TargetLowering
{
private:
- int VarArgsFrameOffset; // Frame offset to start of varargs area.
+ int VarArgsFrameOffset; // Frame offset to start of varargs area.
public:
AMDILTargetLowering(TargetMachine &TM);
@@ -251,12 +214,7 @@
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0
- ) const;
-
- virtual MachineBasicBlock*
- EmitInstrWithCustomInserter(
- MachineInstr *MI,
- MachineBasicBlock *MBB) const;
+ ) const;
virtual bool
getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -264,7 +222,7 @@
virtual const char*
getTargetNodeName(
unsigned Opcode
- ) const;
+ ) const;
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
virtual EVT getSetCCResultType(EVT VT) const;
@@ -359,12 +317,13 @@
SDValue
genCLZu32(SDValue Op, SelectionDAG &DAG) const;
SDValue
- genf64toi32(SDValue Op, SelectionDAG &DAG,
- bool includeSign) const;
-
+ genf64toi32(SDValue Op, SelectionDAG &DAG, bool includeSign) const;
SDValue
- genf64toi64(SDValue Op, SelectionDAG &DAG,
- bool includeSign) const;
+ genf64toi64(SDValue Op, SelectionDAG &DAG, bool includeSign) const;
+ SDValue
+ genf32toi64(SDValue Op, SelectionDAG &DAG, bool includeSign) const;
+ SDValue
+ geni64tof32(SDValue Op, SelectionDAG &DAG, bool includeSign) const;
SDValue
genu32tof64(SDValue Op, EVT dblvt, SelectionDAG &DAG) const;
@@ -480,10 +439,7 @@
LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue
- LowerAND(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue
- LowerOR(SDValue Op, SelectionDAG &DAG) const;
+ LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue
LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -492,9 +448,6 @@
LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue
- LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue
LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
EVT
@@ -513,51 +466,11 @@
LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue
LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
- void
- generateCMPInstr(MachineInstr*, MachineBasicBlock*,
- const TargetInstrInfo&) const;
- MachineOperand
- convertToReg(MachineOperand) const;
-
- // private members used by the set of instruction generation
- // functions, these are marked mutable as they are cached so
- // that they don't have to constantly be looked up when using the
- // generateMachineInst/genVReg instructions. This is to simplify
- // the code
- // and to make it cleaner. The object itself doesn't change as
- // only these functions use these three data types.
- mutable MachineBasicBlock *mBB;
- mutable DebugLoc *mDL;
- mutable const TargetInstrInfo *mTII;
- mutable MachineBasicBlock::iterator mBBI;
- void
- setPrivateData(MachineBasicBlock *BB,
- MachineBasicBlock::iterator &BBI,
- DebugLoc *DL,
- const TargetInstrInfo *TII) const;
- uint32_t genVReg(uint32_t regType) const;
- MachineInstrBuilder
- generateMachineInst(uint32_t opcode,
- uint32_t dst) const;
- MachineInstrBuilder
- generateMachineInst(uint32_t opcode,
- uint32_t dst, uint32_t src1) const;
- MachineInstrBuilder
- generateMachineInst(uint32_t opcode,
- uint32_t dst, uint32_t src1, uint32_t src2) const;
- MachineInstrBuilder
- generateMachineInst(uint32_t opcode,
- uint32_t dst, uint32_t src1, uint32_t src2,
- uint32_t src3) const;
uint32_t
addExtensionInstructions(
uint32_t reg, bool signedShift,
unsigned int simpleVT) const;
- void
- generateLongRelational(MachineInstr *MI,
- unsigned int opCode) const;
-
-}; // AMDILTargetLowering
+}; // AMDILTargetLowering
} // end namespace llvm
#endif // AMDIL_ISELLOWERING_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILImageExpansion.cpp Wed Sep 12 12:43:34 2012
@@ -23,15 +23,7 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
-AMDILImageExpansion::AMDILImageExpansion(TargetMachine &tm, CodeGenOpt::Level OptLevel)
- : AMDIL789IOExpansion(tm, OptLevel)
-{
-}
-
-AMDILImageExpansion::~AMDILImageExpansion()
-{
-}
-void AMDILImageExpansion::expandInefficientImageLoad(
+void AMDILImageExpansionImpl::expandInefficientImageLoad(
MachineBasicBlock *mBB, MachineInstr *MI)
{
#if 0
@@ -69,7 +61,7 @@
if (!rID) {
O << "\tdefault\n";
} else {
- O << "\tcase " << rID << "\n" ;
+ O << "\tcase " << rID << "\n";
}
O << "\tswitch " << mASM->getRegisterName(MI->getOperand(2).getReg())
<< "\n";
@@ -79,7 +71,7 @@
if (!sID) {
O << "\tdefault\n";
} else {
- O << "\tcase " << sID << "\n" ;
+ O << "\tcase " << sID << "\n";
}
}
if (internalSampler) {
@@ -103,30 +95,30 @@
<< "\tendif\n";
} else {
O << "\tiadd " << tReg1 << ".y, " << tReg1 << ".x, l0.y\n"
- // Check if sampler has normalized setting.
+ // Check if sampler has normalized setting.
<< "\tand r0, " << tReg2 << ".x, l0.y\n"
- // Convert image dimensions to float.
+ // Convert image dimensions to float.
<< "\titof " << tReg4 << ", cb1[" << tReg1 << ".x].xyz\n"
- // Move into R0 1 if unnormalized or dimensions if normalized.
+ // Move into R0 1 if unnormalized or dimensions if normalized.
<< "\tcmov_logical r0, r0, " << tReg4 << ", r1.1111\n"
- // Make coordinates unnormalized.
+ // Make coordinates unnormalized.
<< "\tmul " << tReg3 << ", r0, " << tReg3 << "\n"
- // Get linear filtering if set.
+ // Get linear filtering if set.
<< "\tand " << tReg4 << ", " << tReg2 << ".x, l6.x\n"
- // Save unnormalized coordinates in R0.
+ // Save unnormalized coordinates in R0.
<< "\tmov r0, " << tReg3 << "\n"
- // Floor the coordinates due to HW incompatibility with precision
- // requirements.
- << "\tflr " << tReg3 << ", " << tReg3 << "\n"
- // get Origianl coordinates (without floor) if linear filtering
- << "\tcmov_logical " << tReg3 << ", " << tReg4
- << ".xxxx, r0, " << tReg3 << "\n"
- // Normalize the coordinates with multiplying by 1/dimensions
- << "\tmul " << tReg3 << ", " << tReg3 << ", cb1["
- << tReg1 << ".y].xyz\n"
- << "\tsample_resource(" << rID << ")_sampler("
- << sID << ")_coordtype(normalized) "
- << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n";
+ // Floor the coordinates due to HW incompatibility with precision
+ // requirements.
+ << "\tflr " << tReg3 << ", " << tReg3 << "\n"
+ // get Origianl coordinates (without floor) if linear filtering
+ << "\tcmov_logical " << tReg3 << ", " << tReg4
+ << ".xxxx, r0, " << tReg3 << "\n"
+ // Normalize the coordinates with multiplying by 1/dimensions
+ << "\tmul " << tReg3 << ", " << tReg3 << ", cb1["
+ << tReg1 << ".y].xyz\n"
+ << "\tsample_resource(" << rID << ")_sampler("
+ << sID << ")_coordtype(normalized) "
+ << tReg1 << ", " << tReg3 << " ; " << name.data() << "\n";
}
if (SamplerCount - 1) {
O << "\tbreak\n";
@@ -145,14 +137,16 @@
#endif
}
void
-AMDILImageExpansion::expandImageLoad(MachineBasicBlock *mBB, MachineInstr *MI)
+AMDILImageExpansionImpl::expandImageLoad(MachineBasicBlock *mBB,
+ MachineInstr *MI)
{
uint32_t imageID = getPointerID(MI);
MI->getOperand(1).ChangeToImmediate(imageID);
saveInst = true;
}
void
-AMDILImageExpansion::expandImageStore(MachineBasicBlock *mBB, MachineInstr *MI)
+AMDILImageExpansionImpl::expandImageStore(MachineBasicBlock *mBB,
+ MachineInstr *MI)
{
uint32_t imageID = getPointerID(MI);
mKM->setOutputInst();
@@ -160,11 +154,12 @@
saveInst = true;
}
void
-AMDILImageExpansion::expandImageParam(MachineBasicBlock *mBB, MachineInstr *MI)
+AMDILImageExpansionImpl::expandImageParam(MachineBasicBlock *mBB,
+ MachineInstr *MI)
{
uint32_t ID = getPointerID(MI);
DebugLoc DL = MI->getDebugLoc();
- BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CBLOAD),
+ BuildMI(*mBB, MI, DL, mTII->get(AMDIL::CB32LOAD),
MI->getOperand(0).getReg())
.addImm(ID)
.addImm(1);
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInliner.cpp Wed Sep 12 12:43:34 2012
@@ -36,7 +36,7 @@
namespace
{
-class LLVM_LIBRARY_VISIBILITY AMDILInlinePass: public FunctionPass
+class LLVM_LIBRARY_VISIBILITY AMDILInlinePass : public FunctionPass
{
public:
@@ -51,7 +51,8 @@
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
private:
typedef DenseMap<const ArrayType*, SmallVector<AllocaInst*,
- DEFAULT_VEC_SLOTS> > InlinedArrayAllocasTy;
+ DEFAULT_VEC_SLOTS> >
+ InlinedArrayAllocasTy;
bool
AMDILInlineCallIfPossible(CallSite CS,
const TargetData *TD,
@@ -61,7 +62,6 @@
char AMDILInlinePass::ID = 0;
} // anonymouse namespace
-
namespace llvm
{
FunctionPass*
@@ -79,12 +79,12 @@
AMDILInlinePass::~AMDILInlinePass()
{
}
-
-
bool
-AMDILInlinePass::AMDILInlineCallIfPossible(CallSite CS,
- const TargetData *TD, InlinedArrayAllocasTy &InlinedArrayAllocas)
-{
+AMDILInlinePass::AMDILInlineCallIfPossible(
+ CallSite CS,
+ const TargetData *TD,
+ InlinedArrayAllocasTy &
+ InlinedArrayAllocas) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
@@ -95,7 +95,7 @@
if (!InlineFunction(CS, IFI))
return false;
DEBUG(errs() << "<amdilinline> function " << Caller->getName()
- << ": inlined call to "<< Callee->getName() << "\n");
+ << ": inlined call to "<< Callee->getName() << "\n");
// If the inlined function had a higher stack protection level than the
// calling function, then bump up the caller's stack protection level.
@@ -105,7 +105,6 @@
!Caller->hasFnAttr(Attribute::StackProtectReq))
Caller->addFnAttr(Attribute::StackProtect);
-
// Look at all of the allocas that we inlined through this call site. If we
// have already inlined other allocas through other calls into this function,
// then we know that they have disjoint lifetimes and that we can merge them.
@@ -135,7 +134,6 @@
for (unsigned AllocaNo = 0,
e = IFI.StaticAllocas.size();
AllocaNo != e; ++AllocaNo) {
-
AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
// Don't bother trying to merge array allocations (they will usually be
@@ -147,7 +145,7 @@
// Get the list of all available allocas for this array type.
SmallVector<AllocaInst*, DEFAULT_VEC_SLOTS> &AllocasForType
- = InlinedArrayAllocas[ATy];
+ = InlinedArrayAllocas[ATy];
// Loop over the allocas in AllocasForType to see if we can reuse one. Note
// that we have to be careful not to reuse the same "available" alloca for
@@ -193,7 +191,6 @@
return true;
}
-
bool
AMDILInlinePass::runOnFunction(Function &MF)
{
@@ -219,7 +216,8 @@
continue;
// We don't want to inline if we are recursive.
- if (CS.getCalledFunction() && CS.getCalledFunction()->getName() == MF.getName()) {
+ if (CS.getCalledFunction() && CS.getCalledFunction()->getName() ==
+ MF.getName()) {
AMDILMachineFunctionInfo *MFI =
getAnalysis<MachineFunctionAnalysis>().getMF()
.getInfo<AMDILMachineFunctionInfo>();
@@ -248,7 +246,6 @@
}
return Changed;
}
-
const char*
AMDILInlinePass::getPassName() const
{
@@ -259,13 +256,11 @@
{
return false;
}
-
bool
AMDILInlinePass::doFinalization(Module &M)
{
return false;
}
-
void
AMDILInlinePass::getAnalysisUsage(AnalysisUsage &AU) const
{
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.cpp Wed Sep 12 12:43:34 2012
@@ -27,7 +27,6 @@
{
llvm_unreachable("unsupported");
}
-
AMDILInstPrinter::~AMDILInstPrinter()
{
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstPrinter.h Wed Sep 12 12:43:34 2012
@@ -14,24 +14,25 @@
#ifndef AMDILMINSTPRINTER_H_
#define AMDILMINSTPRINTER_H_
#include "AMDILLLVMVersion.h"
+
#include "AMDILLLVMPC.h"
+
#include "llvm/MC/MCInstPrinter.h"
-namespace llvm
-{
+namespace llvm {
class MCAsmInfo;
class MCInst;
class raw_ostream;
// FIXME: We will need to implement this class when we transition to use
// MCStreamer.
-class AMDILInstPrinter : public MCInstPrinter
-{
+class AMDILInstPrinter : public MCInstPrinter {
public:
virtual ~AMDILInstPrinter();
- AMDILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI);
+ AMDILInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI);
virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef annot);
};
-
} // namespace llvm
#endif // AMDILMINSTPRINTER_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.cpp Wed Sep 12 12:43:34 2012
@@ -28,26 +28,21 @@
AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm)
: AMDILGenInstrInfo(AMDIL::ADJCALLSTACKDOWN, AMDIL::ADJCALLSTACKUP),
RI(tm, *this),
- TM(tm)
-{
+ TM(tm) {
}
-
-const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const
-{
+const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const {
return RI;
}
-
/// Return true if the instruction is a register to register move and leave the
/// source and dest operands in the passed parameters.
bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg,
unsigned int &DstReg, unsigned int &SrcSubIdx,
- unsigned int &DstSubIdx) const
-{
+ unsigned int &DstSubIdx) const {
// FIXME: we should look for:
// add with 0
//assert(0 && "is Move Instruction has not been implemented yet!");
//return true;
- if (!isMove(MI.getOpcode())) {
+ if (MI.getOpcode() == TargetOpcode::COPY) {
return false;
}
if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) {
@@ -59,19 +54,17 @@
SrcSubIdx = 0;
return true;
}
-
bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SubIdx) const
-{
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
return false;
unsigned opc = MI.getOpcode();
SubIdx = llvm::NoSubRegister;
switch (opc) {
default:
return false;
- case AMDIL::DHI:
- case AMDIL::LHI:
+ case AMDIL::DHIf64r:
+ case AMDIL::LHIi64r:
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative.
return false;
@@ -79,8 +72,8 @@
DstReg = MI.getOperand(0).getReg();
SubIdx = llvm::sub_y_comp;
break;
- case AMDIL::DLO:
- case AMDIL::LLO:
+ case AMDIL::DLOf64r:
+ case AMDIL::LLOi64r:
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative.
return false;
@@ -88,8 +81,8 @@
DstReg = MI.getOperand(0).getReg();
SubIdx = llvm::sub_x_comp;
break;
- case AMDIL::VEXTRACT_v2f64:
- case AMDIL::VEXTRACT_v2i64:
+ case AMDIL::VEXTRACTv2f64r:
+ case AMDIL::VEXTRACTv2i64r:
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative.
return false;
@@ -107,14 +100,14 @@
default:
return false;
};
- case AMDIL::VEXTRACT_v2f32:
- case AMDIL::VEXTRACT_v2i32:
- case AMDIL::VEXTRACT_v2i16:
- case AMDIL::VEXTRACT_v2i8:
- case AMDIL::VEXTRACT_v4f32:
- case AMDIL::VEXTRACT_v4i32:
- case AMDIL::VEXTRACT_v4i16:
- case AMDIL::VEXTRACT_v4i8:
+ case AMDIL::VEXTRACTv2f32r:
+ case AMDIL::VEXTRACTv2i32r:
+ case AMDIL::VEXTRACTv2i16r:
+ case AMDIL::VEXTRACTv2i8r:
+ case AMDIL::VEXTRACTv4f32r:
+ case AMDIL::VEXTRACTv4i32r:
+ case AMDIL::VEXTRACTv4i16r:
+ case AMDIL::VEXTRACTv4i8r:
if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
// Be conservative.
return false;
@@ -141,31 +134,25 @@
};
return SubIdx != llvm::NoSubRegister;
}
-
unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const
-{
- if (isPrivateInst(TM, MI) && isLoadInst(TM, MI) && MI->getOperand(1).isFI()) {
+ int &FrameIndex) const {
+ if (isPrivateInst(MI) && isPtrLoadInst(MI) && MI->getOperand(1).isFI()) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
return 0;
}
-
unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const
-{
- if (isPrivateInst(TM, MI) && isLoadInst(TM, MI) && MI->getOperand(1).isFI()) {
+ int &FrameIndex) const {
+ if (isPrivateInst(MI) && isPtrLoadInst(MI) && MI->getOperand(1).isFI()) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
return 0;
}
-
bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const
-{
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end();
o != oe;
@@ -181,18 +168,16 @@
return false;
}
unsigned AMDILInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const
-{
- if (isPrivateInst(TM, MI) && isStoreInst(TM, MI) && MI->getOperand(1).isFI()) {
+ int &FrameIndex) const {
+ if (isPrivateInst(MI) && isPtrStoreInst(MI) && MI->getOperand(1).isFI()) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
return 0;
}
unsigned AMDILInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const
-{
- if (isPrivateInst(TM, MI) && isStoreInst(TM, MI) && MI->getOperand(1).isFI()) {
+ int &FrameIndex) const {
+ if (isPrivateInst(MI) && isPtrStoreInst(MI) && MI->getOperand(1).isFI()) {
unsigned Reg;
if ((Reg = isStoreToStackSlot(MI, FrameIndex))) {
return Reg;
@@ -203,9 +188,8 @@
return 0;
}
bool AMDILInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
- const MachineMemOperand *&MMO,
- int &FrameIndex) const
-{
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end();
o != oe;
@@ -220,38 +204,30 @@
}
return false;
}
-
void
AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo &TRI) const
-{
+ const TargetRegisterInfo &TRI) const {
// TODO: Implement this function
}
-
MachineInstr*
AMDILInstrInfo::duplicate(MachineInstr *Orig,
- MachineFunction &MF) const
-{
+ MachineFunction &MF) const {
// TODO: Implement this function
return MF.CloneMachineInstr(Orig);
}
-
MachineInstr *
AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
- LiveVariables *LV) const
-{
+ LiveVariables *LV) const {
// TODO: Implement this function
return NULL;
}
-
MachineInstr*
AMDILInstrInfo::commuteInstruction(MachineInstr *MI,
- bool NewMI) const
-{
+ bool NewMI) const {
// TODO: Implement this function
return NULL;
}
@@ -270,29 +246,30 @@
// TODO: Implement this function
return false;
}
-
bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
- MachineBasicBlock &MBB) const
-{
+ MachineBasicBlock &MBB) const {
while (iter != MBB.end()) {
switch (iter->getOpcode()) {
default:
break;
- ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
- case AMDIL::BRANCH:
+ case AMDIL::BRANCHf64br:
+ case AMDIL::BRANCHf32br:
+ case AMDIL::BRANCHi64br:
+ case AMDIL::BRANCHi32br:
+ case AMDIL::BRANCHi16br:
+ case AMDIL::BRANCHi8br:
+ case AMDIL::BRANCHb:
return true;
};
++iter;
}
return false;
}
-
bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const
-{
+ bool AllowModify) const {
bool retVal = true;
return retVal;
MachineBasicBlock::iterator iter = MBB.begin();
@@ -301,7 +278,7 @@
} else {
MachineInstr *firstBranch = iter;
if (!getNextBranchInstr(++iter, MBB)) {
- if (firstBranch->getOpcode() == AMDIL::BRANCH) {
+ if (firstBranch->getOpcode() == AMDIL::BRANCHb) {
TBB = firstBranch->getOperand(0).getMBB();
firstBranch->eraseFromParent();
retVal = false;
@@ -317,7 +294,7 @@
} else {
MachineInstr *secondBranch = iter;
if (!getNextBranchInstr(++iter, MBB)) {
- if (secondBranch->getOpcode() == AMDIL::BRANCH) {
+ if (secondBranch->getOpcode() == AMDIL::BRANCHb) {
TBB = firstBranch->getOperand(0).getMBB();
Cond.push_back(firstBranch->getOperand(1));
FBB = secondBranch->getOperand(0).getMBB();
@@ -336,28 +313,19 @@
}
return retVal;
}
-
-unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const
-{
+unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const {
const MachineInstr *MI = op.getParent();
switch (MI->getDesc().OpInfo->RegClass) {
- default: // FIXME: fallthrough??
- case AMDIL::GPRI8RegClassID:
- return AMDIL::BRANCH_COND_i8;
- case AMDIL::GPRI16RegClassID:
- return AMDIL::BRANCH_COND_i16;
- case AMDIL::GPRI32RegClassID:
- return AMDIL::BRANCH_COND_i32;
- case AMDIL::GPRI64RegClassID:
- return AMDIL::BRANCH_COND_i64;
- case AMDIL::GPRF32RegClassID:
- return AMDIL::BRANCH_COND_f32;
- case AMDIL::GPRF64RegClassID:
- return AMDIL::BRANCH_COND_f64;
+ default: // FIXME: fallthrough??
+ case AMDIL::GPRI8RegClassID: return AMDIL::BRANCHi8br;
+ case AMDIL::GPRI16RegClassID: return AMDIL::BRANCHi16br;
+ case AMDIL::GPRI32RegClassID: return AMDIL::BRANCHi32br;
+ case AMDIL::GPRI64RegClassID: return AMDIL::BRANCHi64br;
+ case AMDIL::GPRF32RegClassID: return AMDIL::BRANCHf32br;
+ case AMDIL::GPRF64RegClassID: return AMDIL::BRANCHf64br;
};
}
-
unsigned int
AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
@@ -371,7 +339,7 @@
}
if (FBB == 0) {
if (Cond.empty()) {
- BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(AMDIL::BRANCHb)).addMBB(TBB);
} else {
BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
.addMBB(TBB).addReg(Cond[0].getReg());
@@ -380,14 +348,12 @@
} else {
BuildMI(&MBB, DL, get(getBranchInstr(Cond[0])))
.addMBB(TBB).addReg(Cond[0].getReg());
- BuildMI(&MBB, DL, get(AMDIL::BRANCH)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(AMDIL::BRANCHb)).addMBB(FBB);
}
assert(0 && "Inserting two branches not supported");
return 0;
}
-
-unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
-{
+unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) {
return 0;
@@ -396,8 +362,13 @@
switch (I->getOpcode()) {
default:
return 0;
- ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
- case AMDIL::BRANCH:
+ case AMDIL::BRANCHf64br:
+ case AMDIL::BRANCHf32br:
+ case AMDIL::BRANCHi64br:
+ case AMDIL::BRANCHi32br:
+ case AMDIL::BRANCHi16br:
+ case AMDIL::BRANCHi8br:
+ case AMDIL::BRANCHb:
I->eraseFromParent();
break;
}
@@ -408,18 +379,21 @@
}
--I;
switch (I->getOpcode()) {
- // FIXME: only one case??
+ // FIXME: only one case??
default:
return 1;
- ExpandCaseToAllScalarTypes(AMDIL::BRANCH_COND);
+ case AMDIL::BRANCHf64br:
+ case AMDIL::BRANCHf32br:
+ case AMDIL::BRANCHi64br:
+ case AMDIL::BRANCHi32br:
+ case AMDIL::BRANCHi16br:
+ case AMDIL::BRANCHi8br:
I->eraseFromParent();
break;
}
return 2;
}
-
-MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB)
-{
+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator tmp = MBB->end();
if (!MBB->size()) {
return MBB->end();
@@ -439,23 +413,20 @@
}
return MBB->end();
}
-
bool
AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
- DebugLoc DL) const
-{
+ DebugLoc DL) const {
// If we are adding to the end of a basic block we can safely assume that the
// move is caused by a PHI node since all move instructions that are non-PHI
// have already been inserted into the basic blocks Therefor we call the skip
// flow control instruction to move the iterator before the flow control
// instructions and put the move instruction there.
bool phi = (DestReg < 1025) || (SrcReg < 1025);
- int movInst = phi ? getMoveInstFromID(DestRC->getID())
- : getPHIMoveInstFromID(DestRC->getID());
+ int movInst = TargetOpcode::COPY;
MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB)
: I;
@@ -484,12 +455,12 @@
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const
{
- BuildMI(MBB, MI, DL, get(AMDIL::MOVE_v4i32), DestReg)
+ BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
#if 0
DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
- << " to " << RI.getName(DestReg) << '\n');
+ << " to " << RI.getName(DestReg) << '\n');
llvm_unreachable("Cannot emit physreg copy instruction");
#endif
}
@@ -499,8 +470,7 @@
unsigned SrcReg, bool isKill,
int FrameIndex,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
+ const TargetRegisterInfo *TRI) const {
unsigned int Opc = 0;
MachineFunction &MF = *(MBB.getParent());
MachineFrameInfo &MFI = *MF.getFrameInfo();
@@ -508,64 +478,64 @@
DebugLoc DL;
switch (RC->getID()) {
default:
- Opc = AMDIL::PRIVATESTORE_v4i32;
+ Opc = AMDIL::PRIVATESTOREv4i32r;
break;
case AMDIL::GPRF32RegClassID:
- Opc = AMDIL::PRIVATESTORE_f32;
+ Opc = AMDIL::PRIVATESTOREf32r;
break;
case AMDIL::GPRF64RegClassID:
- Opc = AMDIL::PRIVATESTORE_f64;
+ Opc = AMDIL::PRIVATESTOREf64r;
break;
case AMDIL::GPRI16RegClassID:
- Opc = AMDIL::PRIVATESTORE_i16;
+ Opc = AMDIL::PRIVATESTOREi16r;
break;
case AMDIL::GPRI32RegClassID:
- Opc = AMDIL::PRIVATESTORE_i32;
+ Opc = AMDIL::PRIVATESTOREi32r;
break;
case AMDIL::GPRI8RegClassID:
- Opc = AMDIL::PRIVATESTORE_i8;
+ Opc = AMDIL::PRIVATESTOREi8r;
break;
case AMDIL::GPRI64RegClassID:
- Opc = AMDIL::PRIVATESTORE_i64;
+ Opc = AMDIL::PRIVATESTOREi64r;
break;
case AMDIL::GPRV2F32RegClassID:
- Opc = AMDIL::PRIVATESTORE_v2f32;
+ Opc = AMDIL::PRIVATESTOREv2f32r;
break;
case AMDIL::GPRV2F64RegClassID:
- Opc = AMDIL::PRIVATESTORE_v2f64;
+ Opc = AMDIL::PRIVATESTOREv2f64r;
break;
case AMDIL::GPRV2I16RegClassID:
- Opc = AMDIL::PRIVATESTORE_v2i16;
+ Opc = AMDIL::PRIVATESTOREv2i16r;
break;
case AMDIL::GPRV2I32RegClassID:
- Opc = AMDIL::PRIVATESTORE_v2i32;
+ Opc = AMDIL::PRIVATESTOREv2i32r;
break;
case AMDIL::GPRV2I8RegClassID:
- Opc = AMDIL::PRIVATESTORE_v2i8;
+ Opc = AMDIL::PRIVATESTOREv2i8r;
break;
case AMDIL::GPRV2I64RegClassID:
- Opc = AMDIL::PRIVATESTORE_v2i64;
+ Opc = AMDIL::PRIVATESTOREv2i64r;
break;
case AMDIL::GPRV4F32RegClassID:
- Opc = AMDIL::PRIVATESTORE_v4f32;
+ Opc = AMDIL::PRIVATESTOREv4f32r;
break;
case AMDIL::GPRV4I16RegClassID:
- Opc = AMDIL::PRIVATESTORE_v4i16;
+ Opc = AMDIL::PRIVATESTOREv4i16r;
break;
case AMDIL::GPRV4I32RegClassID:
- Opc = AMDIL::PRIVATESTORE_v4i32;
+ Opc = AMDIL::PRIVATESTOREv4i32r;
break;
case AMDIL::GPRV4I8RegClassID:
- Opc = AMDIL::PRIVATESTORE_v4i8;
+ Opc = AMDIL::PRIVATESTOREv4i8r;
break;
}
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineMemOperand *MMO =
new MachineMemOperand(
- MachinePointerInfo::getFixedStack(FrameIndex),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MachinePointerInfo::getFixedStack(FrameIndex),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
if (MI != MBB.end()) {
DL = MI->getDebugLoc();
}
@@ -578,81 +548,79 @@
mfinfo->setUsesScratch();
AMDILAS::InstrResEnc curRes;
curRes.bits.ResourceID
- = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
setAsmPrinterFlags(nMI, curRes);
}
-
void
AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
-{
+ const TargetRegisterInfo *TRI) const {
unsigned int Opc = 0;
MachineFunction &MF = *(MBB.getParent());
MachineFrameInfo &MFI = *MF.getFrameInfo();
DebugLoc DL;
switch (RC->getID()) {
default:
- Opc = AMDIL::PRIVATELOAD_v4i32;
+ Opc = AMDIL::PRIVATELOADv4i32r;
break;
case AMDIL::GPRF32RegClassID:
- Opc = AMDIL::PRIVATELOAD_f32;
+ Opc = AMDIL::PRIVATELOADf32r;
break;
case AMDIL::GPRF64RegClassID:
- Opc = AMDIL::PRIVATELOAD_f64;
+ Opc = AMDIL::PRIVATELOADf64r;
break;
case AMDIL::GPRI16RegClassID:
- Opc = AMDIL::PRIVATELOAD_i16;
+ Opc = AMDIL::PRIVATELOADi16r;
break;
case AMDIL::GPRI32RegClassID:
- Opc = AMDIL::PRIVATELOAD_i32;
+ Opc = AMDIL::PRIVATELOADi32r;
break;
case AMDIL::GPRI8RegClassID:
- Opc = AMDIL::PRIVATELOAD_i8;
+ Opc = AMDIL::PRIVATELOADi8r;
break;
case AMDIL::GPRI64RegClassID:
- Opc = AMDIL::PRIVATELOAD_i64;
+ Opc = AMDIL::PRIVATELOADi64r;
break;
case AMDIL::GPRV2F32RegClassID:
- Opc = AMDIL::PRIVATELOAD_v2f32;
+ Opc = AMDIL::PRIVATELOADv2f32r;
break;
case AMDIL::GPRV2F64RegClassID:
- Opc = AMDIL::PRIVATELOAD_v2f64;
+ Opc = AMDIL::PRIVATELOADv2f64r;
break;
case AMDIL::GPRV2I16RegClassID:
- Opc = AMDIL::PRIVATELOAD_v2i16;
+ Opc = AMDIL::PRIVATELOADv2i16r;
break;
case AMDIL::GPRV2I32RegClassID:
- Opc = AMDIL::PRIVATELOAD_v2i32;
+ Opc = AMDIL::PRIVATELOADv2i32r;
break;
case AMDIL::GPRV2I8RegClassID:
- Opc = AMDIL::PRIVATELOAD_v2i8;
+ Opc = AMDIL::PRIVATELOADv2i8r;
break;
case AMDIL::GPRV2I64RegClassID:
- Opc = AMDIL::PRIVATELOAD_v2i64;
+ Opc = AMDIL::PRIVATELOADv2i64r;
break;
case AMDIL::GPRV4F32RegClassID:
- Opc = AMDIL::PRIVATELOAD_v4f32;
+ Opc = AMDIL::PRIVATELOADv4f32r;
break;
case AMDIL::GPRV4I16RegClassID:
- Opc = AMDIL::PRIVATELOAD_v4i16;
+ Opc = AMDIL::PRIVATELOADv4i16r;
break;
case AMDIL::GPRV4I32RegClassID:
- Opc = AMDIL::PRIVATELOAD_v4i32;
+ Opc = AMDIL::PRIVATELOADv4i32r;
break;
case AMDIL::GPRV4I8RegClassID:
- Opc = AMDIL::PRIVATELOAD_v4i8;
+ Opc = AMDIL::PRIVATELOADv4i8r;
break;
}
MachineMemOperand *MMO =
new MachineMemOperand(
- MachinePointerInfo::getFixedStack(FrameIndex),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MachinePointerInfo::getFixedStack(FrameIndex),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
if (MI != MBB.end()) {
DL = MI->getDebugLoc();
}
@@ -665,17 +633,15 @@
.addImm(0);
AMDILAS::InstrResEnc curRes;
curRes.bits.ResourceID
- = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
+ = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID);
setAsmPrinterFlags(nMI, curRes);
-
}
#if 0
MachineInstr *
AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const
-{
+ int FrameIndex) const {
// TODO: Implement this function
return 0;
}
@@ -683,8 +649,7 @@
AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const
-{
+ MachineInstr *LoadMI) const {
// TODO: Implement this function
return 0;
}
@@ -693,7 +658,8 @@
#if 0
bool
AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const
+ const SmallVectorImpl<unsigned> &Ops)
+const
{
// TODO: Implement this function
return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
@@ -702,25 +668,21 @@
AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad,
bool UnfoldStore,
- SmallVectorImpl<MachineInstr*> &NewMIs) const
-{
+ SmallVectorImpl<MachineInstr*> &NewMIs)
+const {
// TODO: Implement this function
return false;
}
-
bool
AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
- SmallVectorImpl<SDNode*> &NewNodes) const
-{
+ SmallVectorImpl<SDNode*> &NewNodes) const {
// TODO: Implement this function
return false;
}
-
unsigned
AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
- bool UnfoldLoad, bool UnfoldStore,
- unsigned *LoadRegIndex) const
-{
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex) const {
// TODO: Implement this function
return 0;
}
@@ -728,8 +690,7 @@
bool
AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
- int64_t &Offset2) const
-{
+ int64_t &Offset2) const {
if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) {
return false;
}
@@ -783,11 +744,9 @@
}
return false;
}
-
bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
- int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const
-{
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
LoadSDNode *LoadSD1 = dyn_cast<LoadSDNode>(Load1);
LoadSDNode *LoadSD2 = dyn_cast<LoadSDNode>(Load2);
if (!LoadSD1 || !LoadSD2) {
@@ -810,9 +769,8 @@
// TODO: Make the loads schedule near if it fits in a cacheline
return (NumLoads < 16 && (Offset2 - Offset1) < 16);
}
-
-bool AMDILInstrInfo::shouldScheduleWithNormalPriority(SDNode* instruction) const
-{
+bool AMDILInstrInfo::shouldScheduleWithNormalPriority(SDNode* instruction)
+const {
if (instruction->isMachineOpcode()) {
unsigned int Opc = instruction->getMachineOpcode();
switch(Opc) {
@@ -821,103 +779,99 @@
case AMDIL::BARRIER_LOCAL:
case AMDIL::BARRIER_GLOBAL:
case AMDIL::BARRIER_REGION:
- case AMDIL::FENCE:
- case AMDIL::FENCE_LOCAL:
- case AMDIL::FENCE_GLOBAL:
- case AMDIL::FENCE_REGION:
- case AMDIL::FENCE_READ_ONLY:
- case AMDIL::FENCE_READ_ONLY_LOCAL:
- case AMDIL::FENCE_READ_ONLY_GLOBAL:
- case AMDIL::FENCE_READ_ONLY_REGION:
- case AMDIL::FENCE_WRITE_ONLY:
- case AMDIL::FENCE_WRITE_ONLY_LOCAL:
- case AMDIL::FENCE_WRITE_ONLY_GLOBAL:
- case AMDIL::FENCE_WRITE_ONLY_REGION:
- return true; // Maybe other instructions will need to be added to this?
+ case AMDIL::FENCEr:
+ case AMDIL::FENCE_Lr:
+ case AMDIL::FENCE_Mr:
+ case AMDIL::FENCE_Gr:
+ case AMDIL::FENCE_LMr:
+ case AMDIL::FENCE_LGr:
+ case AMDIL::FENCE_MGr:
+ case AMDIL::FENCE_ROr:
+ case AMDIL::FENCE_RO_Lr:
+ case AMDIL::FENCE_RO_Mr:
+ case AMDIL::FENCE_RO_Gr:
+ case AMDIL::FENCE_RO_LMr:
+ case AMDIL::FENCE_RO_LGr:
+ case AMDIL::FENCE_RO_MGr:
+ case AMDIL::FENCE_WOr:
+ case AMDIL::FENCE_WO_Lr:
+ case AMDIL::FENCE_WO_Mr:
+ case AMDIL::FENCE_WO_Gr:
+ case AMDIL::FENCE_WO_LMr:
+ case AMDIL::FENCE_WO_LGr:
+ case AMDIL::FENCE_WO_MGr:
+ case AMDIL::FENCE_Sr:
+ case AMDIL::FENCE_MSr:
+ case AMDIL::FENCE_LSr:
+ case AMDIL::FENCE_GSr:
+ case AMDIL::FENCE_LMSr:
+ case AMDIL::FENCE_MGSr:
+ case AMDIL::FENCE_LGSr:
+ return true; // Maybe other instructions will need to be added to this?
default:
return false;
}
}
return false;
}
-
bool
AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
-const
-{
+const {
// TODO: Implement this function
return true;
}
void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const
-{
+ MachineBasicBlock::iterator MI) const {
// TODO: Implement this function
}
-
-bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const
-{
+bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const {
// TODO: Implement this function
return false;
}
-bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const
-{
+bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
// TODO: Implement this function
return false;
}
-
-bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const
-{
+bool AMDILInstrInfo::PredicateInstruction(
+ MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand>
+ &Pred) const {
// TODO: Implement this function
return false;
}
-
bool
AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2)
-const
-{
+const {
// TODO: Implement this function
return false;
}
-
bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const
{
// TODO: Implement this function
return false;
}
-
-bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const
-{
+bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const {
// TODO: Implement this function
return MI->getDesc().isPredicable();
}
-
bool
-AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const
-{
+AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
// TODO: Implement this function
return true;
}
-
-unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const
-{
+unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// TODO: Implement this function
return 0;
}
-
-
unsigned
-AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const
-{
+AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const {
// TODO: Implement this function
return 0;
}
-
unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const
-{
+ const MCAsmInfo &MAI) const {
// TODO: Implement this function
return 0;
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.h Wed Sep 12 12:43:34 2012
@@ -22,14 +22,12 @@
#define GET_INSTRINFO_HEADER
#include "AMDILGenInstrInfo.inc"
-namespace llvm
-{
+namespace llvm {
// AMDIL - This namespace holds all of the target specific flags that
// instruction info tracks.
//
//class AMDILTargetMachine;
-class AMDILInstrInfo : public AMDILGenInstrInfo
-{
+class AMDILInstrInfo : public AMDILGenInstrInfo {
private:
const AMDILRegisterInfo RI;
AMDILTargetMachine &TM;
@@ -66,8 +64,6 @@
const MachineMemOperand *&MMO,
int &FrameIndex) const;
-
-
void reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
@@ -90,8 +86,6 @@
const MachineInstr *MI1,
const MachineRegisterInfo *MRI = 0) const;
-
-
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -187,9 +181,7 @@
unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
unsigned getInlineAsmLength(const char *Str,
const MCAsmInfo &MAI) const;
-
};
-
}
#endif // AMDILINSTRINFO_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrInfo.td Wed Sep 12 12:43:34 2012
@@ -14,8 +14,7 @@
def HasHWDDiv : Predicate<"Subtarget->device()"
"->getGeneration() > AMDILDeviceInfo::HD4XXX && "
"Subtarget->device()->usesHardware(AMDILDeviceInfo::DoubleOps) && "
- "(Subtarget->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX ||"
- "Subtarget->calVersion() < CAL_VERSION_SC_155)">;
+ "Subtarget->device()->getGeneration() <= AMDILDeviceInfo::HD6XXX">;
// Predicate that is set to true if the hardware supports double, but not double
// precision divide in hardware
@@ -48,13 +47,12 @@
// Predicate that is set to true if 64bit Mul is supported in the IL or not
-def HasHW64Mul : Predicate<"Subtarget->calVersion()"
- ">= CAL_VERSION_SC_139"
- "&& Subtarget->device()"
+def HasHW64Mul : Predicate<"Subtarget->device()"
"->getGeneration() >="
"AMDILDeviceInfo::HD5XXX">;
-def HasSW64Mul : Predicate<"Subtarget->calVersion()"
- "< CAL_VERSION_SC_139">;
+def HasSW64Mul : Predicate<"Subtarget->device()"
+ "->getGeneration() <"
+ "AMDILDeviceInfo::HD5XXX">;
// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
def HasHW64DivMod : Predicate<"Subtarget->device()"
"->usesHardware(AMDILDeviceInfo::HW64BitDivMod)">;
@@ -63,22 +61,13 @@
// Predicate that is set to true if BFI/BFM are supported.
-def HasHWBitFieldInst : Predicate<"Subtarget->calVersion()"
- ">= CAL_VERSION_SC_151"
- "&& Subtarget->device()"
+def HasHWBitFieldInst : Predicate<"Subtarget->device()"
"->getGeneration() >="
"AMDILDeviceInfo::HD5XXX">;
-def HasHWDoubleAbs : Predicate<"Subtarget->calVersion()"
- ">= CAL_VERSION_SC_153">;
-def HasSWDoubleAbs : Predicate<"Subtarget->calVersion()"
- "< CAL_VERSION_SC_153">;
-def HasHWDoubleConv : Predicate<"Subtarget->calVersion()"
- ">= CAL_VERSION_SC_155">;
-
def IsEGOrLaterDevice : Predicate<"Subtarget->device()->getGeneration()"
" >= AMDILDeviceInfo::HD5XXX">;
-def HasByteShortUAV : Predicate<"Subtarget->device()"
+def IsSIOrLaterDevice : Predicate<"Subtarget->device()"
"->getGeneration() >= AMDILDeviceInfo::HD7XXX">;
def Has64BitPtr : Predicate<"Subtarget->is64bit()">;
@@ -109,14 +98,14 @@
include "AMDILFormats.td"
//===--------------------------------------------------------------------===//
-// Multiclass Instruction formats
+// Intrinsics support
//===--------------------------------------------------------------------===//
-include "AMDILMultiClass.td"
+include "AMDILIntrinsics.td"
//===--------------------------------------------------------------------===//
-// Intrinsics support
+// Multiclass Instruction formats
//===--------------------------------------------------------------------===//
-include "AMDILIntrinsics.td"
+include "AMDILMultiClass.td"
//===--------------------------------------------------------------------===//
// Instructions support
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstrPatterns.td Wed Sep 12 12:43:34 2012
@@ -11,130 +11,47 @@
//
//===----------------------------------------------------------------------===//
-def : Pat<(i32 (or GPRI32:$src0, GPRI32:$src1)),
- (i32 (BINARY_OR_i32 GPRI32:$src0, GPRI32:$src1))>;
-
-// integer subtraction
-// a - b ==> a + (-b)
-def SUB_i8 : Pat<(sub GPRI8:$src0, GPRI8:$src1),
- (ADD_i8 GPRI8:$src0, (NEGATE_i8 GPRI8:$src1))>;
-def SUB_v2i8 : Pat<(sub GPRV2I8:$src0, GPRV2I8:$src1),
- (ADD_v2i8 GPRV2I8:$src0, (NEGATE_v2i8 GPRV2I8:$src1))>;
-def SUB_v4i8 : Pat<(sub GPRV4I8:$src0, GPRV4I8:$src1),
- (ADD_v4i8 GPRV4I8:$src0, (NEGATE_v4i8 GPRV4I8:$src1))>;
-def SUB_i16 : Pat<(sub GPRI16:$src0, GPRI16:$src1),
- (ADD_i16 GPRI16:$src0, (NEGATE_i16 GPRI16:$src1))>;
-def SUB_v2i16 : Pat<(sub GPRV2I16:$src0, GPRV2I16:$src1),
- (ADD_v2i16 GPRV2I16:$src0, (NEGATE_v2i16 GPRV2I16:$src1))>;
-def SUB_v4i16 : Pat<(sub GPRV4I16:$src0, GPRV4I16:$src1),
- (ADD_v4i16 GPRV4I16:$src0, (NEGATE_v4i16 GPRV4I16:$src1))>;
-def SUB_i32 : Pat<(sub GPRI32:$src0, GPRI32:$src1),
- (ADD_i32 GPRI32:$src0, (NEGATE_i32 GPRI32:$src1))>;
-def SUB_v2i32 : Pat<(sub GPRV2I32:$src0, GPRV2I32:$src1),
- (ADD_v2i32 GPRV2I32:$src0, (NEGATE_v2i32 GPRV2I32:$src1))>;
-def SUB_v4i32 : Pat<(sub GPRV4I32:$src0, GPRV4I32:$src1),
- (ADD_v4i32 GPRV4I32:$src0, (NEGATE_v4i32 GPRV4I32:$src1))>;
-
-// Convert between float -> ulong efficiently
-// static ulong
-// cf2ul(float f)
-// {
-// float fh = f * 0x1.0p-32f;
-// uint uh = (uint)fh;
-// float fuh = (float)uh;
-// float fl = mad(-0x1.0p+32f, fuh, f);
-// uint ul = (uint)fl;
-// return as_ulong((uint2)(ul, uh));
-// }
-def FTOUL_i64 : Pat<(i64 (fp_to_uint GPRF32:$src0)),
- (LCREATE
- (FTOU
- (FMAD_f32
- (IL_ASFLOAT_i32 (LOADCONST_i32 0xcf800000)),
- (UTOF
- (FTOU
- (MUL_IEEE_f32 GPRF32:$src0,
- (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))
- )
- )
- ),
- GPRF32:$src0)
- ),
- (FTOU
- (MUL_IEEE_f32 GPRF32:$src0,
- (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))
- )
- )
- )>;
-
-// static ulong2
-// cf22ul2(float2 f)
-// {
-// float2 fh = f * 0x1.0p-32f;
-// uint2 uh = convert_uint2(fh);
-// float2 fuh = convert_float2(uh);
-// float2 fl = mad(-0x1.0p+32f, fuh, f);
-// uint2 ul = convert_uint2(fl);
-// return as_ulong2((uint4)(ul, uh));
-// }
-def FTOUL_v2i64 : Pat<(v2i64 (fp_to_uint GPRV2F32:$src0)),
- (LCREATE_v2i64
- (FTOU_v2i32
- (FMAD_v2f32
- (VCREATE_v2f32
- (IL_ASFLOAT_i32 (LOADCONST_i32 0xcf800000))),
- (UTOF_v2f32 (FTOU_v2i32
- (MUL_IEEE_v2f32 GPRV2F32:$src0,
- (VCREATE_v2f32
- (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000)))))),
- GPRV2F32:$src0)),
- (FTOU_v2i32 (MUL_IEEE_v2f32 GPRV2F32:$src0,
- (VCREATE_v2f32
- (IL_ASFLOAT_i32 (LOADCONST_i32 0x2f800000))))))>;
+def SUBi8rr : Pat<(sub GPRI8:$src0, GPRI8:$src1),
+ (ADDi8rr GPRI8:$src0, (NEGi8r GPRI8:$src1))>;
+def SUBv2i8rr : Pat<(sub GPRV2I8:$src0, GPRV2I8:$src1),
+ (ADDv2i8rr GPRV2I8:$src0, (NEGv2i8r GPRV2I8:$src1))>;
+def SUBv4i8rr : Pat<(sub GPRV4I8:$src0, GPRV4I8:$src1),
+ (ADDv4i8rr GPRV4I8:$src0, (NEGv4i8r GPRV4I8:$src1))>;
+def SUBi16rr : Pat<(sub GPRI16:$src0, GPRI16:$src1),
+ (ADDi16rr GPRI16:$src0, (NEGi16r GPRI16:$src1))>;
+def SUBv2i16rr : Pat<(sub GPRV2I16:$src0, GPRV2I16:$src1),
+ (ADDv2i16rr GPRV2I16:$src0, (NEGv2i16r GPRV2I16:$src1))>;
+def SUBv4i16rr : Pat<(sub GPRV4I16:$src0, GPRV4I16:$src1),
+ (ADDv4i16rr GPRV4I16:$src0, (NEGv4i16r GPRV4I16:$src1))>;
+def SUBi32rr : Pat<(sub GPRI32:$src0, GPRI32:$src1),
+ (ADDi32rr GPRI32:$src0, (NEGi32r GPRI32:$src1))>;
+def SUBv2i32rr : Pat<(sub GPRV2I32:$src0, GPRV2I32:$src1),
+ (ADDv2i32rr GPRV2I32:$src0, (NEGv2i32r GPRV2I32:$src1))>;
+def SUBv4i32rr : Pat<(sub GPRV4I32:$src0, GPRV4I32:$src1),
+ (ADDv4i32rr GPRV4I32:$src0, (NEGv4i32r GPRV4I32:$src1))>;
// LLVM isn't lowering this correctly, so writing a pattern that
// matches it isntead.
def : Pat<(build_vector (f32 fpimm:$src)),
- (VCREATE_v4f32 (LOADCONST_f32 fpimm:$src))>;
+ (VCREATEv4f32r (LOADCONSTf32 fpimm:$src))>;
def : Pat<(build_vector (i32 imm:$src)),
- (VCREATE_v4i32 (LOADCONST_i32 imm:$src))>;
+ (VCREATEv4i32r (LOADCONSTi32 imm:$src))>;
def : Pat<(build_vector (i16 imm:$src)),
- (VCREATE_v4i16 (LOADCONST_i16 imm:$src))>;
+ (VCREATEv4i16r (LOADCONSTi16 imm:$src))>;
def : Pat<(build_vector (i8 imm:$src)),
- (VCREATE_v4i8 (LOADCONST_i8 imm:$src))>;
+ (VCREATEv4i8r (LOADCONSTi8 imm:$src))>;
def : Pat<(build_vector (f64 fpimm:$src)),
- (VCREATE_v2f64 (LOADCONST_f64 fpimm:$src))>;
+ (VCREATEv2f64r (LOADCONSTf64 fpimm:$src))>;
def : Pat<(build_vector (f32 fpimm:$src)),
- (VCREATE_v2f32 (LOADCONST_f32 fpimm:$src))>;
+ (VCREATEv2f32r (LOADCONSTf32 fpimm:$src))>;
def : Pat<(build_vector (i64 imm:$src)),
- (VCREATE_v2i64 (LOADCONST_i64 imm:$src))>;
+ (VCREATEv2i64r (LOADCONSTi64 imm:$src))>;
def : Pat<(build_vector (i32 imm:$src)),
- (VCREATE_v2i32 (LOADCONST_i32 imm:$src))>;
+ (VCREATEv2i32r (LOADCONSTi32 imm:$src))>;
def : Pat<(build_vector (i16 imm:$src)),
- (VCREATE_v2i16 (LOADCONST_i16 imm:$src))>;
+ (VCREATEv2i16r (LOADCONSTi16 imm:$src))>;
def : Pat<(build_vector (i8 imm:$src)),
- (VCREATE_v2i8 (LOADCONST_i8 imm:$src))>;
-
-// Correctly lower shl with 32bit left hand side immediate
-def : Pat<(i32 (shl imm:$src, GPRI64:$shift)),
- (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
-def : Pat<(i32 (sra imm:$src, GPRI64:$shift)),
- (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
-def : Pat<(i32 (srl imm:$src, GPRI64:$shift)),
- (SHL_i32 (LOADCONST_i32 imm:$src), (LLO GPRI64:$shift))>;
-def : Pat<(i32 (shl GPRI32:$src, GPRI64:$shift)),
- (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
-def : Pat<(i32 (sra GPRI32:$src, GPRI64:$shift)),
- (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
-def : Pat<(i32 (srl GPRI32:$src, GPRI64:$shift)),
- (SHL_i32 GPRI32:$src, (LLO GPRI64:$shift))>;
-// Correctly lower shl with 64bit right hand side immediate
-def : Pat<(i32 (shl GPRI32:$src, (i64 imm:$shift))),
- (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
-def : Pat<(i32 (sra GPRI32:$src, (i64 imm:$shift))),
- (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
-def : Pat<(i32 (srl GPRI32:$src, (i64 imm:$shift))),
- (SHL_i32 GPRI32:$src, (LLO (LOADCONST_i64 imm:$shift)))>;
+ (VCREATEv2i8r (LOADCONSTi8 imm:$src))>;
// Calls:
def : Pat<(IL_call tglobaladdr:$dst),
@@ -153,60 +70,24 @@
}]>;
/// Pattern 1: (lhs & bitpat) | (rhs & ~bitpat)
-def bfi_pat1 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+def bfi_pat1 : PatFrag<(ops node:$bitpat, node:$lhs, node:$rhs),
(or
(and node:$lhs, node:$bitpat),
(and node:$rhs, (not node:$bitpat)))>;
-/// Pattern 1b: (lhs & bitpat) | (rhs & ~bitpat)
-/// FIXME: This pattern needs to be removed, but requires cleanup of IL_or
-def bfi_pat1b : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
- (IL_or
- (and node:$lhs, node:$bitpat),
- (and node:$rhs, (not node:$bitpat)))>;
-
/// Pattern 2: (lhs & bitpat) | (rhs & (bitpat ^ -1))
-def bfi_pat2 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+def bfi_pat2 : PatFrag<(ops node:$bitpat, node:$lhs, node:$rhs),
(or
(and node:$lhs, node:$bitpat),
(and node:$rhs,
(xor node:$bitpat, isLoadConstantAllOnes) ))>;
-/// Pattern 2b: (lhs & bitpat) | (rhs & (bitpat ^ -1))
-/// FIXME: This pattern needs to be removed, but requires cleanup of IL_or
-def bfi_pat2b : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
- (IL_or
- (and node:$lhs, node:$bitpat),
- (and node:$rhs,
- (xor node:$bitpat, isLoadConstantAllOnes) ))>;
-
/// Pattern 3: (rhs ^ ((rhs ^ lhs) & bitpat))
-def bfi_pat3 : PatFrag<(ops node:$lhs, node:$rhs, node:$bitpat),
+def bfi_pat3 : PatFrag<(ops node:$bitpat, node:$lhs, node:$rhs),
(xor node:$rhs,
(and (xor node:$rhs, node:$lhs),
node:$bitpat))>;
-/// Bitfield Insert pattern fragments
-def isLoadConstantOne : PatLeaf<(timm),
- [{
- return N->isOne();
- }]>;
-
-def is0x1FConstant : PatLeaf<(timm),
- [{
- return N->getZExtValue() == 0x1F;
- }]>;
-
-def is0x3EConstant : PatLeaf<(timm),
- [{
- return N->getZExtValue() == 0x3E;
- }]>;
-
-def is0x1FConstantOrLess : PatLeaf<(timm),
- [{
- return N->getZExtValue() <= 0x1F;
- }]>;
-
def bitmask_5bits : PatFrag<(ops node:$mask),
(and node:$mask, (i32 0x1f))>;
@@ -223,13 +104,11 @@
(bitmask_5bits node:$offset))>;
let Predicates = [HasHWBitFieldInst] in {
-defm BFI_PAT1A : TernaryPatFragI32<IL_OP_BFI, bfi_pat1>;
-defm BFI_PAT1B : TernaryPatFragI32<IL_OP_BFI, bfi_pat1b>;
-defm BFI_PAT2A : TernaryPatFragI32<IL_OP_BFI, bfi_pat2>;
-defm BFI_PAT2B : TernaryPatFragI32<IL_OP_BFI, bfi_pat2b>;
-defm BFI_PAT3 : TernaryPatFragI32<IL_OP_BFI, bfi_pat3>;
-defm BFM_PAT1A : BinaryPatFragI32<IL_OP_BFM, bfm_pat1>;
-defm BFM_PAT1B : BinaryPatFragI32<IL_OP_BFM, bfm_pat1b>;
+defm BFI_PAT1A : TernaryPatReg<IL_OP_BFI, bfi_pat1, GPRI32, i32>;
+defm BFI_PAT2A : TernaryPatReg<IL_OP_BFI, bfi_pat2, GPRI32, i32>;
+defm BFI_PAT3 : TernaryPatReg<IL_OP_BFI, bfi_pat3, GPRI32, i32>;
+defm BFM_PAT1A : BinaryPatMCi32Scalar<IL_OP_BFM, bfm_pat1>;
+defm BFM_PAT1B : BinaryPatMCi32Scalar<IL_OP_BFM, bfm_pat1b>;
}
//
@@ -259,28 +138,6 @@
return (csrc3 == (32 - csrc2));
}]>;
-def bitalign_1b : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
- (IL_or (shl node:$src0, node:$src3), (srl node:$src1, node:$src2)),
- [{
- SDNode *N_or1 = N->getOperand(1).getNode();
- SDNode *N_src2 = N_or1->getOperand(1).getNode();
- ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
- if (!CN_src2) {
- return false;
- }
-
- SDNode *N_or0 = N->getOperand(0).getNode();
- SDNode *N_src3 = N_or0->getOperand(1).getNode();
- ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
- if (!CN_src3) {
- return false;
- }
-
- uint32_t csrc2 = CN_src2->getZExtValue();
- uint32_t csrc3 = CN_src3->getZExtValue();
- return (csrc3 == (32 - csrc2));
- }]>;
-
def bitalign_2 : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
(or (srl node:$src1, node:$src2), (shl node:$src0, node:$src3)),
[{
@@ -303,28 +160,6 @@
return (csrc3 == (32 - csrc2));
}]>;
-def bitalign_2b : PatFrag<(ops node:$src0, node:$src1, node:$src2, node:$src3),
- (IL_or (srl node:$src1, node:$src2), (shl node:$src0, node:$src3)),
- [{
- SDNode *N_or0 = N->getOperand(0).getNode();
- SDNode *N_src2 = N_or0->getOperand(1).getNode();
- ConstantSDNode* CN_src2 = dyn_cast<ConstantSDNode>(N_src2);
- if (!CN_src2) {
- return false;
- }
-
- SDNode *N_or1 = N->getOperand(1).getNode();
- SDNode *N_src3 = N_or1->getOperand(1).getNode();
- ConstantSDNode* CN_src3 = dyn_cast<ConstantSDNode>(N_src3);
- if (!CN_src3) {
- return false;
- }
-
- uint32_t csrc2 = CN_src2->getZExtValue();
- uint32_t csrc3 = CN_src3->getZExtValue();
- return (csrc3 == (32 - csrc2));
- }]>;
-
// B. src2 is a variable
def bitalign_3 : PatFrag<(ops node:$src0, node:$src1, node:$src2),
@@ -332,35 +167,11 @@
(bitmask_5bits (sub (i32 0), node:$src2))),
(srl node:$src1, (bitmask_5bits node:$src2)))>;
-def bitalign_3b : PatFrag<(ops node:$src0, node:$src1, node:$src2),
- (IL_or (shl node:$src0,
- (bitmask_5bits (sub (i32 0), node:$src2))),
- (srl node:$src1, (bitmask_5bits node:$src2)))>;
-
-// TODO: Using FourInOneOut requires four inputs, but bitalign is actually
-// three inputs... Need to improve this.
-multiclass BitAlignPatFragCI32<ILOpCode opc, PatFrag node> {
- def _i32 : FourInOneOut<opc, (outs GPRI32:$dst),
- (ins GPRI32:$src0, GPRI32:$src1, GPRI32:$src2, GPRI32:$src3),
- !strconcat(opc.Text, " $dst, $src0, $src1, $src2"),
- [(set GPRI32:$dst, (node GPRI32:$src0, GPRI32:$src1, GPRI32:$src2, GPRI32:$src3))]>;
-}
-
-multiclass BitAlignPatFragSI32<ILOpCode opc, PatFrag node> {
- def _i32 : ThreeInOneOut<opc, (outs GPRI32:$dst),
- (ins GPRI32:$src0, GPRI32:$src1, GPRI32:$src2),
- !strconcat(opc.Text, " $dst, $src0, $src1, $src2"),
- [(set GPRI32:$dst, (node GPRI32:$src0, GPRI32:$src1, GPRI32:$src2))]>;
-}
-
// Do bitalign pattern recognization if device is EG or later.
let Predicates = [IsEGOrLaterDevice] in {
defm BITALIGN_PAT_1 : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_1>;
-defm BITALIGN_PAT_1B : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_1b>;
defm BITALIGN_PAT_2 : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_2>;
-defm BITALIGN_PAT_2B : BitAlignPatFragCI32<IL_OP_BIT_ALIGN, bitalign_2b>;
-defm BITALIGN_PAT_3 : BitAlignPatFragSI32<IL_OP_BIT_ALIGN, bitalign_3>;
-defm BITALIGN_PAT_3B : BitAlignPatFragSI32<IL_OP_BIT_ALIGN, bitalign_3b>;
+defm BITALIGN_PAT_3 : TernaryPatMCi32Scalar<IL_OP_BIT_ALIGN, bitalign_3>;
}
// unpack[0-3] dst, src
@@ -381,13 +192,6 @@
def unpack3_1 : PatFrag<(ops node:$src),
(uint_to_fp (i32 (int_AMDIL_bit_extract_u32 (i32 8), (i32 24), node:$src)))>;
-multiclass UnpackPatFrag<ILOpCode opc, PatFrag node> {
- def _i32 : OneInOneOut<opc, (outs GPRF32:$dst),
- (ins GPRI32:$src),
- !strconcat(opc.Text, " $dst, $src"),
- [(set GPRF32:$dst, (node GPRI32:$src))]>;
-}
-
let Predicates = [IsEGOrLaterDevice] in {
defm UNPACK_PAT0 : UnpackPatFrag<IL_OP_UNPACK_0, unpack0>;
defm UNPACK_PAT0_1 : UnpackPatFrag<IL_OP_UNPACK_0, unpack0_1>;
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILInstructions.td Wed Sep 12 12:43:34 2012
@@ -11,190 +11,121 @@
//
//===----------------------------------------------------------------------===//
-let isReMaterializable = 0, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 0, isAsCheapAsAMove = 1, loadconst = 1 in {
defm LOADCONST : ILConstant<"mov $dst, $val">;
- defm MOVE : UnaryOpMC<IL_OP_MOV, IL_mov>;
- defm PHIMOVE : UnaryOpMC<IL_OP_MOV, IL_phimov>;
-}
-defm BINARY_NOT : UnaryOpMC<IL_OP_I_NOT, IL_not>;
-defm BINARY_OR : BinaryOpMC<IL_OP_I_OR, IL_or>;
-defm BINARY_AND : BinaryOpMC<IL_OP_AND, IL_and>;
-defm BINARY_XOR : BinaryOpMC<IL_OP_I_XOR, IL_xor>;
+ defm LOADFI : ILFrameIndex<"mov $dst, $val">;
+ }
+defm OR : BinaryOpMCInt<IL_OP_I_OR, or>;
defm AND : BinaryOpMCInt<IL_OP_AND, and>;
-defm CMOV : BinaryOpMC<IL_OP_CMOV, IL_cmov>;
-defm DIV_INF : BinaryOpMC<IL_OP_DIV_INF, IL_div_inf>;
-defm SMAX : BinaryOpMCInt<IL_OP_I_MAX, IL_smax>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder for 64bit
-// instructions
-defm CMOVLOG : TernaryOpMC<IL_OP_CMOV_LOGICAL, IL_cmov_logical>;
-defm SELECTBIN : TernaryOpMCScalar<IL_OP_CMOV_LOGICAL, select>;
-//===---------------------------------------------------------------------===//
-// Signed 8bit integer math instructions start here
-//===---------------------------------------------------------------------===//
-def INTTOANY_i8 : OneInOneOut<IL_OP_MOV, (outs GPRI8:$dst), (ins GPRI32:$src0),
- !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
- [(set GPRI8:$dst, (IL_inttoany GPRI32:$src0))]>;
-//===---------------------------------------------------------------------===//
-// Signed 16bit integer math instructions start here
-//===---------------------------------------------------------------------===//
-def INTTOANY_i16: OneInOneOut<IL_OP_MOV, (outs GPRI16:$dst), (ins GPRI32:$src0),
- !strconcat(IL_OP_MOV.Text," $dst, $src0"),
- [(set GPRI16:$dst, (IL_inttoany GPRI32:$src0))]>;
-//===---------------------------------------------------------------------===//
-// Signed 32bit integer math instructions start here
-//===---------------------------------------------------------------------===//
-defm NEGATE : UnaryOpMCi32<IL_OP_I_NEGATE, IL_inegate>;
+defm SELECT : SelectMC<IL_OP_CMOV_LOGICAL>;
defm SMUL : BinaryOpMCi32<IL_OP_I_MUL, mul>;
defm SMULHI : BinaryOpMCi32<IL_OP_I_MUL_HIGH, mulhs>;
-defm SHL : BinaryOpMCi32Const<IL_OP_I_SHL, shl>;
-defm SHR : BinaryOpMCi32Const<IL_OP_I_SHR, sra>;
-let Predicates = [Has64BitPtr] in {
-defm SHL : BinaryOpMCi64Const<IL_OP_I_SHL, shl>;
-defm SHR : BinaryOpMCi64Const<IL_OP_I_SHR, sra>;
-defm USHR : BinaryOpMCi64Const<IL_OP_U_SHR, srl>;
-}
-defm SHLVEC : BinaryOpMCi32<IL_OP_I_SHL, shl>;
-defm SHRVEC : BinaryOpMCi32<IL_OP_I_SHR, sra>;
-defm ADD : BinaryOpMCi32<IL_OP_I_ADD, add>;
-defm CUSTOM_XOR : BinaryOpMCInt<IL_OP_I_XOR, xor>;
-// get rid of the addri via the tablegen instead of custom lowered instruction
+defm XOR : BinaryOpMCInt<IL_OP_I_XOR, xor>;
defm CUSTOM_ADD : BinaryOpMCi32<IL_OP_I_ADD, IL_add>;
defm EADD : BinaryOpMCi32<IL_OP_I_ADD, adde>;
-def INTTOANY_i32: OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst), (ins GPRI32:$src0),
- !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
- [(set GPRI32:$dst, (IL_inttoany GPRI32:$src0))]>;
// Integer offsets for addressing
-def ADDir : TwoInOneOut<IL_OP_I_ADD, (outs GPRI32:$dst),
+def ADDpr : ILFormat<IL_OP_I_ADD, (outs GPRI32:$dst),
(ins MEM3232:$ptr, GPRI32:$offset),
!strconcat(IL_OP_I_ADD.Text, " $dst, $ptr, $offset"),
[(set GPRI32:$dst,
(IL_addaddrri ADDR:$ptr,
(i32 GPRI32:$offset)))]>;
-def ADDri : TwoInOneOut<IL_OP_I_ADD, (outs GPRI32:$dst),
+def ADDrp : ILFormat<IL_OP_I_ADD, (outs GPRI32:$dst),
(ins GPRI32:$offset, MEM3232:$ptr),
!strconcat(IL_OP_I_ADD.Text, " $dst, $offset, $ptr"),
[(set GPRI32:$dst,
(IL_addaddrir
(i32 GPRI32:$offset), ADDR:$ptr))]>;
+def ADDi64pr : ILFormat<IL_OP_I64_ADD, (outs GPRI64:$dst),
+ (ins MEM6464:$ptr, GPRI64:$offset),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $ptr, $offset"),
+ [(set GPRI64:$dst,
+ (IL_addaddrri ADDR64:$ptr,
+ (i64 GPRI64:$offset)))]>;
+def ADDi64rp : ILFormat<IL_OP_I64_ADD, (outs GPRI64:$dst),
+ (ins GPRI64:$offset, MEM6464:$ptr),
+ !strconcat(IL_OP_I_ADD.Text, " $dst, $offset, $ptr"),
+ [(set GPRI64:$dst,
+ (IL_addaddrir
+ (i64 GPRI64:$offset), ADDR64:$ptr))]>;
-defm IFFB_HI : UnaryOpMCi32<IL_OP_I_FFB_HI, IL_ffb_hi>;
-defm IFFB_LO : UnaryOpMCi32<IL_OP_I_FFB_LO, IL_ffb_lo>;
let mayLoad = 0, mayStore = 0 in {
-defm ABS : UnaryIntrinsicInt<IL_OP_ABS, int_AMDIL_abs>;
-defm BITCOUNT : UnaryIntrinsicInt<IL_OP_IBIT_COUNT, int_AMDIL_bit_count_i32>;
-defm FFB_LO : UnaryIntrinsicInt<IL_OP_I_FFB_LO, int_AMDIL_bit_find_first_lo>;
-defm FFB_HI : UnaryIntrinsicInt<IL_OP_I_FFB_HI, int_AMDIL_bit_find_first_hi>;
-defm FFB_SGN : UnaryIntrinsicInt<IL_OP_I_FFB_SGN,
+defm ABS : UnaryIntMCInt<IL_OP_ABS, int_AMDIL_abs>;
+defm BITCOUNT : UnaryIntMCInt<IL_OP_IBIT_COUNT, int_AMDIL_bit_count_i32>;
+defm FFB_LO : UnaryIntMCInt<IL_OP_I_FFB_LO, int_AMDIL_bit_find_first_lo>;
+defm FFB_HI : UnaryIntMCInt<IL_OP_I_FFB_HI, int_AMDIL_bit_find_first_hi>;
+defm FFB_SGN : UnaryIntMCInt<IL_OP_I_FFB_SGN,
int_AMDIL_bit_find_first_sgn>;
-defm IMULHI : BinaryIntrinsicInt<IL_OP_I_MUL_HIGH, int_AMDIL_mulhi_i32>;
+defm IMULHI : BinaryIntMCi32<IL_OP_I_MUL_HIGH, int_AMDIL_mulhi_i32>;
let Predicates = [HasHWSign24Bit] in {
-defm IMUL24 : BinaryIntrinsicInt<IL_OP_I_MUL24, int_AMDIL_mul24_i32>;
-defm IMULHI24 : BinaryIntrinsicInt<IL_OP_I_MULHI24, int_AMDIL_mulhi24_i32>;
-defm IMAD24 : TernaryIntrinsicInt<IL_OP_I_MAD24, int_AMDIL_mad24_i32>;
-}
-defm CARRY : BinaryIntrinsicInt<IL_OP_I_CARRY, int_AMDIL_carry_i32>;
-defm BORROW : BinaryIntrinsicInt<IL_OP_I_BORROW, int_AMDIL_borrow_i32>;
-defm IMIN : BinaryIntrinsicInt<IL_OP_I_MIN, int_AMDIL_min_i32>;
-defm IMAX : BinaryIntrinsicInt<IL_OP_I_MAX, int_AMDIL_max_i32>;
-defm CMOV_LOG : TernaryIntrinsicInt<IL_OP_CMOV_LOGICAL,
- int_AMDIL_cmov_logical>;
-defm IBIT_EXTRACT : TernaryIntrinsicInt<IL_OP_IBIT_EXTRACT,
+defm IMUL24 : BinaryIntMCi32<IL_OP_I_MUL24, int_AMDIL_mul24_i32>;
+defm IMULHI24 : BinaryIntMCi32<IL_OP_I_MULHI24, int_AMDIL_mulhi24_i32>;
+defm IMAD24 : TernaryIntMCInt<IL_OP_I_MAD24, int_AMDIL_mad24_i32>;
+}
+defm CARRY : BinaryIntMCi32<IL_OP_I_CARRY, int_AMDIL_carry_i32>;
+defm BORROW : BinaryIntMCi32<IL_OP_I_BORROW, int_AMDIL_borrow_i32>;
+defm IMIN : BinaryIntMCi32<IL_OP_I_MIN, int_AMDIL_min_i32>;
+defm IMAX : BinaryIntMCi32<IL_OP_I_MAX, int_AMDIL_max_i32>;
+defm CMOV_LOG : TernaryIntMCInt<IL_OP_CMOV_LOGICAL, int_AMDIL_cmov_logical>;
+defm IBIT_EXTRACT : TernaryIntMCInt<IL_OP_IBIT_EXTRACT,
int_AMDIL_bit_extract_i32>;
-defm IMAD : TernaryIntrinsicInt<IL_OP_I_MAD, int_AMDIL_mad_i32>;
-defm SAD : TernaryIntrinsicInt<IL_OP_SAD, int_AMDIL_media_sad>;
-defm SADHI : TernaryIntrinsicInt<IL_OP_SAD_HI,
- int_AMDIL_media_sad_hi>;
-}
-def SAD4_i32 : ThreeInOneOut<IL_OP_SAD4, (outs GPRI32:$dst),
- (ins GPRV4I32:$src, GPRV4I32:$src1, GPRI32:$src2),
- !strconcat(IL_OP_SAD4.Text, " $dst, $src, $src1, $src2"),
- [(set GPRI32:$dst,
- (int_AMDIL_media_sad4 GPRV4I32:$src, GPRV4I32:$src1,
- GPRI32:$src2))]>;
-def FTOV4U8_i32 : OneInOneOut<IL_OP_F2U4, (outs GPRI32:$dst),
- (ins GPRV4F32:$src),
- !strconcat(IL_OP_F2U4.Text, " $dst, $src"),
- [(set GPRI32:$dst,
- (int_AMDIL_media_convert_f2v4u8 GPRV4F32:$src))]>;
+defm IMAD : TernaryIntMCInt<IL_OP_I_MAD, int_AMDIL_mad_i32>;
+defm SAD : TernaryIntMCInt<IL_OP_SAD, int_AMDIL_media_sad>;
+defm SADHI : TernaryIntMCInt<IL_OP_SAD_HI, int_AMDIL_media_sad_hi>;
+}
+defm SAD4 : TernaryIntMCFull<IL_OP_SAD4, int_AMDIL_media_sad4,
+ i32, GPRI32, v4i32, GPRV4I32, v4i32imm, imm,
+ v4i32, GPRV4I32, v4i32imm, imm, i32, GPRI32, i32imm, imm>;
+defm FTOV4U8 : UnaryIntMCFull<IL_OP_F2U4, int_AMDIL_media_convert_f2v4u8,
+ i32, GPRI32, v4f32, GPRV4F32, v4f32imm, fpimm>;
//===---------------------------------------------------------------------===//
// Unsigned 32bit integer math instructions start here
//===---------------------------------------------------------------------===//
defm UMUL : BinaryOpMCi32<IL_OP_U_MUL, IL_umul>;
defm UMULHI : BinaryOpMCi32<IL_OP_U_MUL_HIGH, mulhu>;
-defm USHR : BinaryOpMCi32Const<IL_OP_U_SHR, srl>;
-defm USHRVEC : BinaryOpMCi32<IL_OP_U_SHR, srl>;
defm UDIV : BinaryOpMCi32<IL_OP_U_DIV, udiv>;
-defm NATIVE_UDIV : BinaryIntrinsicInt<IL_OP_U_DIV, int_AMDIL_udiv>;
+defm NATIVE_UDIV : BinaryIntMCi32<IL_OP_U_DIV, int_AMDIL_udiv>;
let mayLoad=0, mayStore=0 in {
-defm UBIT_REVERSE : UnaryIntrinsicInt<IL_OP_UBIT_REVERSE,
+defm UBIT_REVERSE : UnaryIntMCInt<IL_OP_UBIT_REVERSE,
int_AMDIL_bit_reverse_u32>;
-defm UMULHI_INT : BinaryIntrinsicInt<IL_OP_U_MUL_HIGH, int_AMDIL_mulhi_u32>;
-defm UMULHI24 : BinaryIntrinsicInt<IL_OP_U_MULHI24, int_AMDIL_mulhi24_u32>;
-defm UMUL24 : BinaryIntrinsicInt<IL_OP_U_MUL24, int_AMDIL_mul24_u32>;
-defm UMIN : BinaryIntrinsicInt<IL_OP_U_MIN, int_AMDIL_min_u32>;
-defm UMAX : BinaryIntrinsicInt<IL_OP_U_MAX, int_AMDIL_max_u32>;
-defm UBIT_EXTRACT : TernaryIntrinsicInt<IL_OP_UBIT_EXTRACT,
+defm UMULHI_INT : BinaryIntMCi32<IL_OP_U_MUL_HIGH, int_AMDIL_mulhi_u32>;
+defm UMULHI24 : BinaryIntMCi32<IL_OP_U_MULHI24, int_AMDIL_mulhi24_u32>;
+defm UMUL24 : BinaryIntMCi32<IL_OP_U_MUL24, int_AMDIL_mul24_u32>;
+defm UMIN : BinaryIntMCi32<IL_OP_U_MIN, int_AMDIL_min_u32>;
+defm UMAX : BinaryIntMCi32<IL_OP_U_MAX, int_AMDIL_max_u32>;
+defm UBIT_EXTRACT : TernaryIntMCInt<IL_OP_UBIT_EXTRACT,
int_AMDIL_bit_extract_u32>;
-defm UBIT_INSERT : QuaternaryIntrinsicInt<IL_OP_UBIT_INSERT,
+let swizzle = 6 in {
+defm UBIT_INSERT : QuaternaryIntMCInt<IL_OP_UBIT_INSERT,
int_AMDIL_bit_insert_u32>;
-defm BFI : TernaryIntrinsicInt<IL_OP_BFI, int_AMDIL_bfi>;
-defm BFM : BinaryIntrinsicInt<IL_OP_BFM, int_AMDIL_bfm>;
-defm UMAD : TernaryIntrinsicInt<IL_OP_U_MAD, int_AMDIL_mad_u32>;
-defm UMAD24 : TernaryIntrinsicInt<IL_OP_U_MAD24, int_AMDIL_mad24_u32>;
-defm U4LERP : TernaryIntrinsicInt<IL_OP_U4_LERP,
+}
+defm BFI : TernaryIntMCInt<IL_OP_BFI, int_AMDIL_bfi>;
+defm BFM : BinaryIntMCi32<IL_OP_BFM, int_AMDIL_bfm>;
+defm UMAD : TernaryIntMCInt<IL_OP_U_MAD, int_AMDIL_mad_u32>;
+defm UMAD24 : TernaryIntMCInt<IL_OP_U_MAD24, int_AMDIL_mad24_u32>;
+defm U4LERP : TernaryIntMCInt<IL_OP_U4_LERP,
int_AMDIL_media_lerp_u4>;
-defm BITALIGN : TernaryIntrinsicInt<IL_OP_BIT_ALIGN, int_AMDIL_media_bitalign>;
-defm BYTEALIGN : TernaryIntrinsicInt<IL_OP_BYTE_ALIGN, int_AMDIL_media_bytealign>;
+defm BITALIGN : TernaryIntMCInt<IL_OP_BIT_ALIGN, int_AMDIL_media_bitalign>;
+defm BYTEALIGN : TernaryIntMCInt<IL_OP_BYTE_ALIGN, int_AMDIL_media_bytealign>;
}
-//===---------------------------------------------------------------------===//
-// Signed 64bit integer math instructions start here
-//===---------------------------------------------------------------------===//
-def LNEGATE : OneInOneOut<IL_OP_I64_NEGATE, (outs GPRI64:$dst), (ins GPRI64:$src),
- !strconcat(IL_OP_I64_NEGATE.Text, " $dst, $src"),
- [(set GPRI64:$dst, (IL_inegate GPRI64:$src))]>;
-def LNEGATE_v2i64: OneInOneOut<IL_OP_I64_NEGATE, (outs GPRV2I64:$dst),
- (ins GPRV2I64:$src),
- !strconcat(IL_OP_I64_NEGATE.Text, " $dst, $src"),
- [(set GPRV2I64:$dst, (IL_inegate GPRV2I64:$src))]>;
let Predicates = [HasHW64Bit] in {
-def LADD_i64 : TwoInOneOut<IL_OP_I64_ADD, (outs GPRI64:$dst),
- (ins GPRI64:$src1, GPRI64:$src2),
- !strconcat(IL_OP_I64_ADD.Text, " $dst, $src1, $src2"),
- [(set GPRI64:$dst, (IL_add GPRI64:$src1, GPRI64:$src2))]>;
-def LADD_v2i64 : TwoInOneOut<IL_OP_I64_ADD, (outs GPRV2I64:$dst),
- (ins GPRV2I64:$src1, GPRV2I64:$src2),
- !strconcat(IL_OP_I64_ADD.Text, " $dst, $src1, $src2"),
- [(set GPRV2I64:$dst, (IL_add GPRV2I64:$src1, GPRV2I64:$src2))]>;
-defm IMIN64 : BinaryIntrinsicLong<IL_OP_I64_MIN, int_AMDIL_min_i32>;
-defm UMIN64 : BinaryIntrinsicLong<IL_OP_U64_MIN, int_AMDIL_min_u32>;
-defm IMAX64 : BinaryIntrinsicLong<IL_OP_I64_MAX, int_AMDIL_max_i32>;
-defm UMAX64 : BinaryIntrinsicLong<IL_OP_U64_MAX, int_AMDIL_max_u32>;
-}
+defm CUSTOM_ADD : BinaryOpMCi64<IL_OP_I64_ADD, IL_add>;
+defm ADD : BinaryOpMCi64<IL_OP_I64_ADD, add>;
+defm IMIN64 : BinaryIntMCi64<IL_OP_I64_MIN, int_AMDIL_min_i32>;
+defm UMIN64 : BinaryIntMCi64<IL_OP_U64_MIN, int_AMDIL_min_u32>;
+defm IMAX64 : BinaryIntMCi64<IL_OP_I64_MAX, int_AMDIL_max_i32>;
+defm UMAX64 : BinaryIntMCi64<IL_OP_U64_MAX, int_AMDIL_max_u32>;
+}
+defm SHL : BinaryNCOpMCi32Shift<IL_OP_I_SHL, shl>;
+defm SHR : BinaryNCOpMCi32Shift<IL_OP_I_SHR, sra>;
+defm USHR : BinaryNCOpMCi32Shift<IL_OP_U_SHR, srl>;
let Predicates = [HasHW64Bit] in {
-def LSHR : TwoInOneOut<IL_OP_I64_SHR, (outs GPRI64:$dst),
- (ins GPRI64:$src1, GPRI32:$src2),
- !strconcat(IL_OP_I64_SHR.Text, " $dst, $src1, $src2"),
- [(set GPRI64:$dst, (sra GPRI64:$src1, GPRI32:$src2))]>;
-def LSHL : TwoInOneOut<IL_OP_I64_SHL, (outs GPRI64:$dst),
- (ins GPRI64:$src1, GPRI32:$src2),
- !strconcat(IL_OP_I64_SHL.Text, " $dst, $src1, $src2"),
- [(set GPRI64:$dst, (shl GPRI64:$src1, GPRI32:$src2))]>;
-// Apple requires a pattern since they pass down the shift operand as
-// a 64bit value, although the lower 6 bits are all that are used.
-def LSHR_APPLE : TwoInOneOut<IL_OP_I64_SHR, (outs GPRI64:$dst),
- (ins GPRI64:$src1, GPRI64:$src2),
- !strconcat(IL_OP_I64_SHR.Text, " $dst, $src1, $src2"),
- [(set GPRI64:$dst, (sra GPRI64:$src1, GPRI64:$src2))]>;
-def LSHL_APPLE : TwoInOneOut<IL_OP_I64_SHL, (outs GPRI64:$dst),
- (ins GPRI64:$src1, GPRI64:$src2),
- !strconcat(IL_OP_I64_SHL.Text, " $dst, $src1, $src2"),
- [(set GPRI64:$dst, (shl GPRI64:$src1, GPRI64:$src2))]>;
+defm SHL : BinaryNCOpMCi64Shift<IL_OP_I64_SHL, shl>;
+defm SHR : BinaryNCOpMCi64Shift<IL_OP_I64_SHR, sra>;
+defm USHR : BinaryNCOpMCi64Shift<IL_OP_U64_SHR, srl>;
}
-//===---------------------------------------------------------------------===//
-// Unsigned 64bit integer math instructions start here
-//===---------------------------------------------------------------------===//
let Predicates = [HasTmrRegister] in {
def Tmr : ILFormat<IL_OP_MOV, (outs GPRXYI64:$tmr),
(ins), !strconcat(IL_OP_MOV.Text, " $tmr, Tmr.xyxy"),
@@ -208,136 +139,93 @@
!strconcat(IL_OP_WAVE_ID.Text, " $id"),
[(set GPRI32:$id, (int_AMDIL_wavefront_id))]>;
}
-let Predicates = [HasHW64Bit] in {
-def LUSHR : TwoInOneOut<IL_OP_U64_SHR, (outs GPRI64:$dst),
- (ins GPRI64:$src1, GPRI32:$src2),
- !strconcat(IL_OP_U64_SHR.Text, " $dst, $src1, $src2"),
- [(set GPRI64:$dst, (srl GPRI64:$src1, GPRI32:$src2))]>;
-// Apple requires a pattern since they pass down the shift operand as
-// a 64bit value, although the lower 6 bits are all that are used.
-def LUSHR_APPLE : TwoInOneOut<IL_OP_U64_SHR, (outs GPRI64:$dst),
- (ins GPRI64:$src1, GPRI64:$src2),
- !strconcat(IL_OP_U64_SHR.Text, " $dst, $src1, $src2"),
- [(set GPRI64:$dst, (srl GPRI64:$src1, GPRI64:$src2))]>;
-}
+let ieee = 1 in {
+ defm MUL_IEEE : BinaryOpMCf32<IL_OP_MUL_IEEE, fmul>,
+ BinaryOpMCf64<IL_OP_D_MUL, fmul>;
+}
+defm ADD : BinaryOpMCf32<IL_OP_ADD, fadd>,
+BinaryOpMCi32<IL_OP_I_ADD, add>,
+BinaryOpMCf64<IL_OP_D_ADD, fadd>;
//===---------------------------------------------------------------------===//
-// Generic Float Instructions
-//===---------------------------------------------------------------------===//
-let hasIEEEFlag = 1 in {
-defm MUL_IEEE : BinaryOpMCFloat<IL_OP_MUL_IEEE, IL_OP_D_MUL, fmul>;
-}
-defm ADD : BinaryOpMCFloat<IL_OP_ADD, IL_OP_D_ADD, fadd>;
-//===---------------------------------------------------------------------===//
// float math instructions start here
//===---------------------------------------------------------------------===//
let mayLoad=0, mayStore=0 in {
-defm ABS : UnaryIntrinsicFloat<IL_OP_ABS, int_AMDIL_fabs>;
-defm FRAC : UnaryIntrinsicFloat<IL_OP_FRC, int_AMDIL_fraction>;
-defm PIREDUCE : UnaryIntrinsicFloat<IL_OP_PI_REDUCE, int_AMDIL_pireduce>;
-defm ROUND_NEAREST : UnaryIntrinsicFloat<IL_OP_ROUND_NEAR,
+defm ABS : UnaryIntMCf32<IL_OP_ABS, int_AMDIL_fabs>;
+defm FRAC : UnaryIntMCf32<IL_OP_FRC, int_AMDIL_fraction>;
+defm PIREDUCE : UnaryIntMCf32<IL_OP_PI_REDUCE, int_AMDIL_pireduce>;
+defm ROUND_NEAREST : UnaryIntMCf32<IL_OP_ROUND_NEAR,
int_AMDIL_round_nearest>;
-defm ROUND_NEGINF : UnaryIntrinsicFloat<IL_OP_ROUND_NEG_INF,
+defm ROUND_NEGINF : UnaryIntMCf32<IL_OP_ROUND_NEG_INF,
int_AMDIL_round_neginf>;
-defm ROUND_POSINF : UnaryIntrinsicFloat<IL_OP_ROUND_POS_INF,
+defm ROUND_POSINF : UnaryIntMCf32<IL_OP_ROUND_POS_INF,
int_AMDIL_round_posinf>;
-defm ROUND_ZERO : UnaryIntrinsicFloat<IL_OP_ROUND_ZERO,
+defm ROUND_ZERO : UnaryIntMCf32<IL_OP_ROUND_ZERO,
int_AMDIL_round_zero>;
-defm ACOS : UnaryIntrinsicFloatScalar<IL_OP_ACOS, int_AMDIL_acos>;
-defm ATAN : UnaryIntrinsicFloatScalar<IL_OP_ATAN, int_AMDIL_atan>;
-defm ASIN : UnaryIntrinsicFloatScalar<IL_OP_ASIN, int_AMDIL_asin>;
-defm TAN : UnaryIntrinsicFloatScalar<IL_OP_TAN, int_AMDIL_tan>;
-defm SIN : UnaryIntrinsicFloatScalar<IL_OP_SIN, int_AMDIL_sin>;
-defm COS : UnaryIntrinsicFloatScalar<IL_OP_COS, int_AMDIL_cos>;
-defm SQRT : UnaryIntrinsicFloatScalar<IL_OP_SQRT, int_AMDIL_sqrt>;
-defm EXP : UnaryIntrinsicFloatScalar<IL_OP_EXP, int_AMDIL_exp>;
-defm EXPVEC : UnaryIntrinsicFloat<IL_OP_EXP_VEC, int_AMDIL_exp_vec>;
-defm SQRTVEC : UnaryIntrinsicFloat<IL_OP_SQRT_VEC, int_AMDIL_sqrt_vec>;
-defm COSVEC : UnaryIntrinsicFloat<IL_OP_COS_VEC, int_AMDIL_cos_vec>;
-defm SINVEC : UnaryIntrinsicFloat<IL_OP_SIN_VEC, int_AMDIL_sin_vec>;
-defm LOGVEC : UnaryIntrinsicFloat<IL_OP_LOG_VEC, int_AMDIL_log_vec>;
-defm RSQVEC : UnaryIntrinsicFloat<IL_OP_RSQ_VEC, int_AMDIL_rsq_vec>;
-defm EXN : UnaryIntrinsicFloatScalar<IL_OP_EXN, int_AMDIL_exn>;
-defm SIGN : UnaryIntrinsicFloat<IL_OP_SGN, int_AMDIL_sign>;
-defm LENGTH : UnaryIntrinsicFloat<IL_OP_LEN, int_AMDIL_length>;
-defm POW : BinaryIntrinsicFloat<IL_OP_POW, int_AMDIL_pow>;
+defm ACOS : UnaryIntMCf32Scalar<IL_OP_ACOS, int_AMDIL_acos>;
+defm ATAN : UnaryIntMCf32Scalar<IL_OP_ATAN, int_AMDIL_atan>;
+defm ASIN : UnaryIntMCf32Scalar<IL_OP_ASIN, int_AMDIL_asin>;
+defm TAN : UnaryIntMCf32Scalar<IL_OP_TAN, int_AMDIL_tan>;
+defm SIN : UnaryIntMCf32Scalar<IL_OP_SIN, int_AMDIL_sin>;
+defm COS : UnaryIntMCf32Scalar<IL_OP_COS, int_AMDIL_cos>;
+defm SQRT : UnaryIntMCf32Scalar<IL_OP_SQRT, int_AMDIL_sqrt>;
+defm EXP : UnaryIntMCf32Scalar<IL_OP_EXP, int_AMDIL_exp>;
+defm EXPVEC : UnaryIntMCf32<IL_OP_EXP_VEC, int_AMDIL_exp_vec>;
+defm SQRTVEC : UnaryIntMCf32<IL_OP_SQRT_VEC, int_AMDIL_sqrt_vec>;
+defm COSVEC : UnaryIntMCf32<IL_OP_COS_VEC, int_AMDIL_cos_vec>;
+defm SINVEC : UnaryIntMCf32<IL_OP_SIN_VEC, int_AMDIL_sin_vec>;
+defm LOGVEC : UnaryIntMCf32<IL_OP_LOG_VEC, int_AMDIL_log_vec>;
+defm RSQVEC : UnaryIntMCf32<IL_OP_RSQ_VEC, int_AMDIL_rsq_vec>;
+defm EXN : UnaryIntMCf32Scalar<IL_OP_EXN, int_AMDIL_exn>;
+defm SIGN : UnaryIntMCf32<IL_OP_SGN, int_AMDIL_sign>;
+defm LENGTH : UnaryIntMCf32<IL_OP_LEN, int_AMDIL_length>;
+defm POW : BinaryIntMCf32<IL_OP_POW, int_AMDIL_pow>;
}
-let hasIEEEFlag = 1 in {
+let ieee = 1 in {
let mayLoad = 0, mayStore=0 in {
-defm MIN : BinaryIntrinsicFloat<IL_OP_MIN, int_AMDIL_min>;
-defm MAX : BinaryIntrinsicFloat<IL_OP_MAX, int_AMDIL_max>;
-defm MAD : TernaryIntrinsicFloat<IL_OP_MAD, int_AMDIL_mad>;
+defm MIN : BinaryIntMCf32<IL_OP_MIN, int_AMDIL_min>;
+defm MAX : BinaryIntMCf32<IL_OP_MAX, int_AMDIL_max>;
+defm MAD : TernaryIntMCf32<IL_OP_MAD, int_AMDIL_mad>;
}
defm MOD : BinaryOpMCf32<IL_OP_MOD, frem>;
}
-let hasZeroOpFlag = 1 in {
+let zeroop = 1 in {
let mayLoad = 0, mayStore=0 in {
-defm LN : UnaryIntrinsicFloatScalar<IL_OP_LN, int_AMDIL_ln>;
-defm LOG : UnaryIntrinsicFloatScalar<IL_OP_LOG, int_AMDIL_log>;
-defm RSQ : UnaryIntrinsicFloatScalar<IL_OP_RSQ, int_AMDIL_rsq>;
-defm DIV_INT : BinaryIntrinsicFloat<IL_OP_DIV, int_AMDIL_div>;
+defm LN : UnaryIntMCf32Scalar<IL_OP_LN, int_AMDIL_ln>;
+defm LOG : UnaryIntMCf32Scalar<IL_OP_LOG, int_AMDIL_log>;
+defm RSQ : UnaryIntMCf32Scalar<IL_OP_RSQ, int_AMDIL_rsq>;
+defm DIV_INT : BinaryIntMCf32<IL_OP_DIV, int_AMDIL_div>;
defm DIV : BinaryOpMCf32<IL_OP_DIV, fdiv>;
-defm DIV_PRECISE : BinaryIntrinsicFloat<IL_OP_DIV_PRECISE, int_AMDIL_div_precise>;
+defm DIV_PRECISE : BinaryIntMCf32<IL_OP_DIV_PRECISE, int_AMDIL_div_precise>;
}
}
let mayLoad = 0, mayStore=0 in {
-defm CLAMP : TernaryIntrinsicFloat<IL_OP_CLAMP, int_AMDIL_clamp>;
-defm FMA : TernaryIntrinsicFloat<IL_OP_FMA, int_AMDIL_fma>;
-defm LERP : TernaryIntrinsicFloat<IL_OP_LERP, int_AMDIL_lerp>;
+defm CLAMP : TernaryIntMCf32<IL_OP_CLAMP, int_AMDIL_clamp>;
+defm FMA : TernaryIntMCf32<IL_OP_FMA, int_AMDIL_fma>;
+defm LERP : TernaryIntMCf32<IL_OP_LERP, int_AMDIL_lerp>;
}
defm SUB : BinaryOpMCf32<IL_OP_SUB, fsub>;
defm FABS : UnaryOpMCf32<IL_OP_ABS, fabs>;
-defm FMAD : TernaryOpMCf32<IL_OP_MAD, IL_mad>;
defm NEARBY : UnaryOpMCf32<IL_OP_ROUND_NEAR, fnearbyint>;
defm TRUNC : UnaryOpMCf32<IL_OP_ROUND_ZERO, ftrunc>;
defm CEIL : UnaryOpMCf32<IL_OP_ROUND_POS_INF, fceil>;
defm FLOOR : UnaryOpMCf32<IL_OP_ROUND_NEG_INF, ffloor>;
-def NEG_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst),
- (ins GPRF32:$src0),
- !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
- [(set GPRF32:$dst, (fneg GPRF32:$src0))]>;
-def INTTOANY_f32 : OneInOneOut<IL_OP_MOV, (outs GPRF32:$dst),
- (ins GPRI32:$src0),
- !strconcat(IL_OP_MOV.Text, " $dst, $src0"),
- [(set GPRF32:$dst, (IL_inttoany GPRI32:$src0))]>;
-let hasIEEEFlag = 1 in {
-def DP2ADD_f32 : ThreeInOneOut<IL_OP_DP2_ADD, (outs GPRF32:$dst),
- (ins GPRV2F32:$src0, GPRV2F32:$src1, GPRF32:$src2),
- !strconcat(IL_OP_DP2_ADD.Text, " $dst, $src0, $src1, $src2"),
- [(set GPRF32:$dst,
- (int_AMDIL_dp2_add GPRV2F32:$src0,
- GPRV2F32:$src1, GPRF32:$src2))]>;
-def DP2_f32 : TwoInOneOut<IL_OP_DP2, (outs GPRF32:$dst),
- (ins GPRV2F32:$src0, GPRV2F32:$src1),
- !strconcat(IL_OP_DP2.Text, " $dst, $src0, $src1"),
- [(set GPRF32:$dst,
- (int_AMDIL_dp2 GPRV2F32:$src0, GPRV2F32:$src1))]>;
-def DP3_f32 : TwoInOneOut<IL_OP_DP3, (outs GPRF32:$dst),
- (ins GPRV4F32:$src0, GPRV4F32:$src1),
- !strconcat(IL_OP_DP3.Text, " $dst, $src0, $src1"),
- [(set GPRF32:$dst,
- (int_AMDIL_dp3 GPRV4F32:$src0, GPRV4F32:$src1))]>;
-def DP4_f32 : TwoInOneOut<IL_OP_DP4, (outs GPRF32:$dst),
- (ins GPRV4F32:$src0, GPRV4F32:$src1),
- !strconcat(IL_OP_DP4.Text, " $dst, $src0, $src1"),
- [(set GPRF32:$dst,
- (int_AMDIL_dp4 GPRV4F32:$src0, GPRV4F32:$src1))]>;
-def FTZ_f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRF32:$dst),
- (ins GPRF32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
- [(set GPRF32:$dst,
- (int_AMDIL_ftz GPRF32:$src))]>;
-def FTZ_v2f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRV2F32:$dst),
- (ins GPRV2F32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
- [(set GPRV2F32:$dst,
- (int_AMDIL_ftz GPRV2F32:$src))]>;
-def FTZ_v4f32 : OneInOneOut<IL_OP_MUL_IEEE, (outs GPRV4F32:$dst),
- (ins GPRV4F32:$src), !strconcat(IL_OP_MUL_IEEE.Text, " $dst, $src, r0.1"),
- [(set GPRV4F32:$dst,
- (int_AMDIL_ftz GPRV4F32:$src))]>;
+let ieee = 1 in {
+ defm DP2ADD : TernaryIntMCFull<IL_OP_DP2_ADD, int_AMDIL_dp2_add, f32, GPRF32,
+ v2f32, GPRV2F32, v2f32imm, fpimm, v2f32, GPRV2F32, v2f32imm, fpimm,
+ f32, GPRF32, f32imm, fpimm>;
+ defm DP2 : BinaryIntMCFull<IL_OP_DP2, int_AMDIL_dp2, f32, GPRF32,
+ v2f32, GPRV2F32, v2f32imm, fpimm, v2f32, GPRV2F32, v2f32imm, fpimm>;
+ defm DP3 : BinaryIntMCFull<IL_OP_DP3, int_AMDIL_dp3, f32, GPRF32,
+ v4f32, GPRV4F32, v4f32imm, fpimm, v4f32, GPRV4F32, v4f32imm, fpimm>;
+ defm DP4 : BinaryIntMCFull<IL_OP_DP4, int_AMDIL_dp4, f32, GPRF32,
+ v4f32, GPRV4F32, v4f32imm, fpimm, v4f32, GPRV4F32, v4f32imm, fpimm>;
+ defm FTZ : FTZMC<IL_OP_MUL_IEEE, int_AMDIL_ftz>;
}
+
defm UNPACK_B0 : IntrConvertI32TOF32<IL_OP_UNPACK_0, int_AMDIL_media_unpack_byte_0>;
defm UNPACK_B1 : IntrConvertI32TOF32<IL_OP_UNPACK_1, int_AMDIL_media_unpack_byte_1>;
defm UNPACK_B2 : IntrConvertI32TOF32<IL_OP_UNPACK_2, int_AMDIL_media_unpack_byte_2>;
@@ -349,103 +237,41 @@
defm FTOH_NEAR : IntrConvertF32TOF16<IL_OP_F32_TO_F16_NEAR, int_AMDIL_convert_f32_f16_near>;
defm FTOH_NEG_INF : IntrConvertF32TOF16<IL_OP_F32_TO_F16_NEG_INF, int_AMDIL_convert_f32_f16_neg_inf>;
defm FTOH_PLUS_INF : IntrConvertF32TOF16<IL_OP_F32_TO_F16_PLUS_INF, int_AMDIL_convert_f32_f16_plus_inf>;
-//===---------------------------------------------------------------------===//
-// float math instructions end here
-//===---------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------===//
-// float2 math instructions start here
-//===---------------------------------------------------------------------===//
-def NEG_v2f32 : OneInOneOut<IL_OP_MOV, (outs GPRV2F32:$dst),
- (ins GPRV2F32:$src0),
- !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
- [(set GPRV2F32:$dst, (fneg GPRV2F32:$src0))]>;
-//===---------------------------------------------------------------------===//
-// float2 math instructions end here
-//===---------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------===//
-// float4 math instructions start here
-//===---------------------------------------------------------------------===//
-def NEG_v4f32 : OneInOneOut<IL_OP_MOV, (outs GPRV4F32:$dst),
- (ins GPRV4F32:$src0),
- !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(xyzw)"),
- [(set GPRV4F32:$dst, (fneg GPRV4F32:$src0))]>;
-//===---------------------------------------------------------------------===//
-// float4 math instructions end here
-//===---------------------------------------------------------------------===//
+defm NEG : UnaryNegMCf32<IL_OP_MOV, fneg>;
+defm SUB : BinaryNegMCf64<IL_OP_D_ADD, fsub>;
+defm NEG : UnaryNegMCf64<IL_OP_MOV, fneg>;
+defm NOT : NotMCReg<IL_OP_I_NOT, not, vnot>;
+def vineg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
+defm NEG : UnaryPatMCi8Scalar< IL_OP_I_NEGATE, ineg>;
+defm NEG : UnaryPatMCi8Vector< IL_OP_I_NEGATE, vineg>;
+defm NEG : UnaryPatMCi16Scalar<IL_OP_I_NEGATE, ineg>;
+defm NEG : UnaryPatMCi16Vector<IL_OP_I_NEGATE, vineg>;
+defm NEG : UnaryPatMCi32Scalar<IL_OP_I_NEGATE, ineg>;
+defm NEG : UnaryPatMCi32Vector<IL_OP_I_NEGATE, vineg>;
+defm NEG : UnaryPatMCi64Scalar<IL_OP_I64_NEGATE, ineg>;
+defm NEG : UnaryPatMCi64Vector<IL_OP_I64_NEGATE, vineg>;
-//===---------------------------------------------------------------------===//
-// double math instructions start here
-//===---------------------------------------------------------------------===//
-def SUB_f64 : TwoInOneOut<IL_OP_D_ADD, (outs GPRF64:$dst),
- (ins GPRF64:$src0, GPRF64:$src1),
- !strconcat(IL_OP_D_ADD.Text, " $dst, $src0, ${src1}_neg(yw)"),
- [(set GPRF64:$dst, (fsub GPRF64:$src0, GPRF64:$src1))]>;
-def SUB_v2f64 : TwoInOneOut<IL_OP_D_ADD, (outs GPRV2F64:$dst),
- (ins GPRV2F64:$src0, GPRV2F64:$src1),
- !strconcat(IL_OP_D_ADD.Text, " $dst, $src0, ${src1}_neg(yw)"),
- [(set GPRV2F64:$dst, (fsub GPRV2F64:$src0, GPRV2F64:$src1))]>;
-def NEG_f64 : OneInOneOut<IL_OP_MOV, (outs GPRF64:$dst),
- (ins GPRF64:$src0),
- !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(yw)"),
- [(set GPRF64:$dst, (fneg GPRF64:$src0))]>;
-def NEG_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2F64:$dst),
- (ins GPRV2F64:$src0),
- !strconcat(IL_OP_MOV.Text, " $dst, ${src0}_neg(yw)"),
- [(set GPRV2F64:$dst, (fneg GPRV2F64:$src0))]>;
let mayLoad = 0, mayStore=0 in {
-defm MIN : BinaryIntrinsicDouble<IL_OP_D_MIN, int_AMDIL_min>;
-defm MAX : BinaryIntrinsicDouble<IL_OP_D_MAX, int_AMDIL_max>;
-defm DIV : BinaryIntrinsicDouble<IL_OP_D_DIV, int_AMDIL_div>;
-defm MAD : TernaryIntrinsicDouble<IL_OP_D_MAD, int_AMDIL_mad>;
-defm DFMA : TernaryIntrinsicDouble<IL_OP_D_MAD, int_AMDIL_fma>;
-defm FRAC : UnaryIntrinsicDouble<IL_OP_D_FRC, int_AMDIL_fraction>;
-defm SQRT : UnaryIntrinsicDouble<IL_OP_D_SQRT, int_AMDIL_sqrt>;
-defm RSQ : UnaryIntrinsicDoubleScalar<IL_OP_D_RSQ, int_AMDIL_rsq>;
-defm RCP : UnaryIntrinsicDoubleScalar<IL_OP_D_RCP, int_AMDIL_drcp>;
-defm DMAD : TernaryOpMCf64<IL_OP_D_MAD, IL_mad>;
+ defm MIN : BinaryIntMCf64<IL_OP_D_MIN, int_AMDIL_min>;
+ defm MAX : BinaryIntMCf64<IL_OP_D_MAX, int_AMDIL_max>;
+ defm DIV : BinaryIntMCf64<IL_OP_D_DIV, int_AMDIL_div>;
+ defm MAD : TernaryIntMCf64<IL_OP_D_MAD, int_AMDIL_mad>;
+ defm DFMA : TernaryIntMCf64<IL_OP_D_MAD, int_AMDIL_fma>;
+ defm FRAC : UnaryIntMCf64<IL_OP_D_FRC, int_AMDIL_fraction>;
+ defm SQRT : UnaryIntMCf64<IL_OP_D_SQRT, int_AMDIL_sqrt>;
+ defm RSQ : UnaryIntMCf64Scalar<IL_OP_D_RSQ, int_AMDIL_rsq>;
+ defm RCP : UnaryIntMCf64Scalar<IL_OP_D_RCP, int_AMDIL_drcp>;
}
-let Predicates = [HasHWDoubleAbs] in {
+
defm DABS : UnaryOpMCf64<IL_OP_D_ABS, fabs>;
- let mayLoad = 0, mayStore=0 in {
-defm ABS : UnaryIntrinsicDouble<IL_OP_D_ABS, int_AMDIL_fabs>;
- }
-}
-let Predicates = [HasSWDoubleAbs] in {
-def SWDABS_f64 : OneInOneOut<IL_OP_D_ABS, (outs GPRF64:$dst),
- (ins GPRF64:$src),
- !strconcat(IL_OP_D_FREXP.Text," $dst, ${src}_abs(yw)"),
- [(set GPRF64:$dst, (fabs GPRF64:$src))]>;
- let mayLoad = 0, mayStore=0 in {
-def SWABS_f64 : OneInOneOut<IL_OP_D_ABS, (outs GPRF64:$dst),
- (ins GPRF64:$src),
- !strconcat(IL_OP_D_FREXP.Text," $dst, ${src}_abs(yw)"),
- [(set GPRF64:$dst, (int_AMDIL_fabs GPRF64:$src))]>;
- }
+let mayLoad = 0, mayStore=0 in {
+ defm ABS : UnaryIntMCf64<IL_OP_D_ABS, int_AMDIL_fabs>;
}
-def FREXP_f64 : OneInOneOut<IL_OP_D_FREXP, (outs GPRV2I64:$dst),
- (ins GPRF64:$src),
- !strconcat(IL_OP_D_FREXP.Text," $dst, $src"),
- [(set GPRV2I64:$dst,
- (int_AMDIL_frexp_f64 GPRF64:$src))]>;
-def LDEXP_f64 : TwoInOneOut<IL_OP_D_LDEXP, (outs GPRF64:$dst),
- (ins GPRF64:$src, GPRI32:$src1),
- !strconcat(IL_OP_D_LDEXP.Text, " $dst, $src, $src1"),
- [(set GPRF64:$dst,
- (int_AMDIL_ldexp GPRF64:$src, GPRI32:$src1))]>;
-def LDEXP_v2f64 : TwoInOneOut<IL_OP_D_LDEXP, (outs GPRV2F64:$dst),
- (ins GPRV2F64:$src, GPRV2I32:$src1),
- !strconcat(IL_OP_D_LDEXP.Text, " $dst, $src, $src1"),
- [(set GPRV2F64:$dst,
- (int_AMDIL_ldexp GPRV2F64:$src, GPRV2I32:$src1))]>;
-//===---------------------------------------------------------------------===//
-// double math instructions end here
-//===---------------------------------------------------------------------===//
-//===---------------------------------------------------------------------===//
-// Various Macros
-//===---------------------------------------------------------------------===//
-def MACRO__sdiv_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, sdiv>;
+
+defm FREXPf64 : UnaryIntMCFull<IL_OP_D_FREXP, int_AMDIL_frexp_f64,
+ v2i64, GPRV2I64, f64, GPRF64, f64imm, fpimm>;
+defm LDEXP : BinaryIntMCDDI<IL_OP_D_LDEXP, int_AMDIL_ldexp>;
+
def MACRO__sdiv_i16 : BinaryMacro<GPRI16, GPRI16, GPRI16, sdiv>;
def MACRO__sdiv_i32 : BinaryMacro<GPRI32, GPRI32, GPRI32, sdiv>;
def MACRO__udiv_i8 : BinaryMacro< GPRI8, GPRI8, GPRI8, udiv>;
@@ -487,377 +313,161 @@
defm SMUL : BinaryOpMCi64<IL_OP_I64_MUL, mul>;
defm UMUL : BinaryOpMCi64<IL_OP_U64_MUL, IL_umul>;
}
-// Apple requires a pattern since they pass down the shift operand as
-// a 64bit value, although the lower 6 bits are all that are used.
-// vector 2 use the software emulated mode since SC only supports
-// scalar 64bit ops.
-def MACRO__shr_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, srl>;
-def MACRO__shl_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, shl>;
-def MACRO__sra_apple_v2i64: BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I64, sra>;
-def MACRO__shr_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, srl>;
-def MACRO__shl_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, shl>;
-def MACRO__sra_v2i64 : BinaryMacro<GPRV2I64, GPRV2I64, GPRV2I32, sra>;
-
-let Predicates = [HasSW64Bit] in {
-def MACRO__shr_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, srl>;
-def MACRO__shl_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, shl>;
-def MACRO__sra_apple_i64: BinaryMacro<GPRI64, GPRI64, GPRI64, sra>;
-def MACRO__shr_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, srl>;
-def MACRO__shl_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, shl>;
-def MACRO__sra_i64 : BinaryMacro<GPRI64, GPRI64, GPRI32, sra>;
-}
//===---------------------------------------------------------------------===//
// Comparison Instructions
//===---------------------------------------------------------------------===//
-let usesCustomInserter = 1 in {
- defm CMP : Compare<"Pseudo comparison instr">;
-}
-//===---------------------------------------------------------------------===//
-// 32-bit floating point operations
-//===---------------------------------------------------------------------===//
-def FEQ : TwoInOneOut<IL_OP_EQ, (outs GPRF32:$dst),
- (ins GPRF32:$lhs, GPRF32:$rhs),
- !strconcat(IL_OP_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def FGE : TwoInOneOut<IL_OP_GE, (outs GPRF32:$dst),
- (ins GPRF32:$lhs, GPRF32:$rhs),
- !strconcat(IL_OP_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def FLT : TwoInOneOut<IL_OP_LT, (outs GPRF32:$dst),
- (ins GPRF32:$lhs, GPRF32:$rhs),
- !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def FLT_v2f32 : TwoInOneOut<IL_OP_LT, (outs GPRV2F32:$dst),
- (ins GPRV2F32:$lhs, GPRV2F32:$rhs),
- !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def FLT_v4f32 : TwoInOneOut<IL_OP_LT, (outs GPRV4F32:$dst),
- (ins GPRV4F32:$lhs, GPRV4F32:$rhs),
- !strconcat(IL_OP_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def FNE : TwoInOneOut<IL_OP_NE, (outs GPRF32:$dst),
- (ins GPRF32:$lhs, GPRF32:$rhs),
- !strconcat(IL_OP_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-
-//===---------------------------------------------------------------------===//
-//TODO: need to correctly define comparison instructions
-//===---------------------------------------------------------------------===//
-def DEQ : TwoInOneOut<IL_OP_D_EQ, (outs GPRF64:$dst),
- (ins GPRF64:$lhs, GPRF64:$rhs),
- !strconcat(IL_OP_D_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def DEQ_v2f64 : TwoInOneOut<IL_OP_D_EQ, (outs GPRV2F64:$dst),
- (ins GPRV2F64:$lhs, GPRV2F64:$rhs),
- !strconcat(IL_OP_D_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def DGE : TwoInOneOut<IL_OP_D_GE, (outs GPRF64:$dst),
- (ins GPRF64:$lhs, GPRF64:$rhs),
- !strconcat(IL_OP_D_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def DLT : TwoInOneOut<IL_OP_D_LT, (outs GPRF64:$dst),
- (ins GPRF64:$lhs, GPRF64:$rhs),
- !strconcat(IL_OP_D_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def DNE : TwoInOneOut<IL_OP_D_NE, (outs GPRF64:$dst),
- (ins GPRF64:$lhs, GPRF64:$rhs),
- !strconcat(IL_OP_D_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-
-//===---------------------------------------------------------------------===//
-//TODO: need to correctly define comparison instructions
-//===---------------------------------------------------------------------===//
-def IEQ : TwoInOneOut<IL_OP_I_EQ, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def IEQ_v2i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def IEQ_v4i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def IGE : TwoInOneOut<IL_OP_I_GE, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def IGE_v2i32 : TwoInOneOut<IL_OP_I_GE, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def IGE_v4i32 : TwoInOneOut<IL_OP_I_GE, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_I_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def ILT : TwoInOneOut<IL_OP_I_LT, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def ILT_v2i32 : TwoInOneOut<IL_OP_I_LT, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def ILT_v4i32 : TwoInOneOut<IL_OP_I_LT, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_I_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def INE : TwoInOneOut<IL_OP_I_NE, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-def INE_v2i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-def INE_v4i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-let Predicates = [HasHW64Bit] in {
-def LEQ : TwoInOneOut<IL_OP_I64_EQ, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_I64_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def LGE : TwoInOneOut<IL_OP_I64_GE, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_I64_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def LLE : TwoInOneOut<IL_OP_I64_GE, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_I64_GE.Text, " $dst, $rhs, $lhs")
- , []>;
-def LGT : TwoInOneOut<IL_OP_I64_LT, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_I64_LT.Text, " $dst, $rhs, $lhs")
- , []>;
-def LLT : TwoInOneOut<IL_OP_I64_LT, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_I64_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def LNE : TwoInOneOut<IL_OP_I64_NE, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_I64_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-}
-
-//===---------------------------------------------------------------------===//
-// Unsigned Integer Operations
-//===---------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------===//
-//TODO: need to correctly define comparison instructions
-//===---------------------------------------------------------------------===//
-def UEQ : TwoInOneOut<IL_OP_I_EQ, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def UEQ_v2i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def UEQ_v4i32 : TwoInOneOut<IL_OP_I_EQ, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_I_EQ.Text, " $dst, $lhs, $rhs")
- , []>;
-def ULE : TwoInOneOut<IL_OP_U_GE, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def ULE_v2i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def ULE_v4i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def UGT : TwoInOneOut<IL_OP_U_LT, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def UGT_v2i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def UGT_v4i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def UGE : TwoInOneOut<IL_OP_U_GE, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def UGE_v2i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def UGE_v4i32 : TwoInOneOut<IL_OP_U_GE, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_U_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def ULT : TwoInOneOut<IL_OP_U_LT, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def ULT_v2i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def ULT_v4i32 : TwoInOneOut<IL_OP_U_LT, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_U_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-def UNE : TwoInOneOut<IL_OP_I_NE, (outs GPRI32:$dst),
- (ins GPRI32:$lhs, GPRI32:$rhs),
- !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-def UNE_v2i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV2I32:$dst),
- (ins GPRV2I32:$lhs, GPRV2I32:$rhs),
- !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-def UNE_v4i32 : TwoInOneOut<IL_OP_I_NE, (outs GPRV4I32:$dst),
- (ins GPRV4I32:$lhs, GPRV4I32:$rhs),
- !strconcat(IL_OP_I_NE.Text, " $dst, $lhs, $rhs")
- , []>;
-let Predicates = [HasHW64Bit] in {
-def ULLE : TwoInOneOut<IL_OP_U64_GE, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_U64_GE.Text, " $dst, $rhs, $lhs")
- , []>;
-def ULGT : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_U64_LT.Text, " $dst, $rhs, $lhs")
- , []>;
-def ULGE : TwoInOneOut<IL_OP_U64_GE, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_U64_GE.Text, " $dst, $lhs, $rhs")
- , []>;
-def ULLT : TwoInOneOut<IL_OP_U64_LT, (outs GPRI64:$dst),
- (ins GPRI64:$lhs, GPRI64:$rhs),
- !strconcat(IL_OP_U64_LT.Text, " $dst, $lhs, $rhs")
- , []>;
-}
+defm EQ : BinaryCmpMCi32<IL_OP_I_EQ, seteq>,
+ BinaryCmpMCi64<IL_OP_I64_EQ, seteq>,
+ BinaryCmpMCf32<IL_OP_EQ, seteq>,
+ BinaryCmpMCf64<IL_OP_D_EQ, seteq>;
+defm OEQ : BinaryCmpMCf32<IL_OP_EQ, setoeq>,
+ BinaryCmpMCf64<IL_OP_D_EQ, setoeq>;
+defm GE : BinaryCmpMCi32<IL_OP_I_GE, setge>,
+ BinaryCmpMCi64<IL_OP_I64_GE, setge>,
+ BinaryCmpMCf32<IL_OP_GE, setge>,
+ BinaryCmpMCf64<IL_OP_D_GE, setge>;
+defm LT : BinaryCmpMCi32<IL_OP_I_LT, setlt>,
+ BinaryCmpMCf32<IL_OP_LT, setlt>,
+ BinaryCmpMCi64<IL_OP_I64_LT, setlt>,
+ BinaryCmpMCf64<IL_OP_D_LT, setlt>;
+defm OLT : BinaryCmpMCf32<IL_OP_LT, setolt>,
+ BinaryCmpMCf64<IL_OP_D_LT, setolt>;
+defm OGE : BinaryCmpMCf32<IL_OP_GE, setoge>,
+ BinaryCmpMCf64<IL_OP_D_GE, setoge>;
+defm ULT : BinaryCmpMCi32<IL_OP_U_LT, setult>,
+ BinaryCmpMCi64<IL_OP_U64_LT, setult>;
+defm UGE : BinaryCmpMCi32<IL_OP_U_GE, setuge>,
+ BinaryCmpMCi64<IL_OP_U64_GE, setuge>;
+defm NE : BinaryCmpMCi32<IL_OP_I_NE, setne>,
+ BinaryCmpMCi64<IL_OP_I64_NE, setne>,
+ BinaryCmpMCf32<IL_OP_NE, setne>,
+ BinaryCmpMCf64<IL_OP_D_NE, setne>;
+defm UNE : BinaryCmpMCf32<IL_OP_NE, setune>,
+ BinaryCmpMCf64<IL_OP_D_NE, setune>;
+/**/
+// Uncomment this if LLVM doesn't support expansion of these.
+defm GT : BinaryInvCmpMCi32<IL_OP_I_GE,setle>;
+defm LE : BinaryInvCmpMCi32<IL_OP_I_LT,setgt>;
+defm UGT : BinaryInvCmpMCi32<IL_OP_U_GE,setule>;
+defm ULE : BinaryInvCmpMCi32<IL_OP_U_LT,setugt>;
+defm GT : BinaryInvCmpMCi64<IL_OP_I64_GE,setle>;
+defm LE : BinaryInvCmpMCi64<IL_OP_I64_LT,setgt>;
+defm UGT : BinaryInvCmpMCi64<IL_OP_U64_GE,setule>;
+defm ULE : BinaryInvCmpMCi64<IL_OP_U64_LT,setugt>;
+defm GT : BinaryInvCmpMCf32<IL_OP_GE, setle>;
+defm LE : BinaryInvCmpMCf32<IL_OP_LT, setgt>;
+defm GT : BinaryInvCmpMCf64<IL_OP_D_GE,setle>;
+defm LE : BinaryInvCmpMCf64<IL_OP_D_LT,setgt>;
+defm OGT : BinaryInvCmpMCf32<IL_OP_GE, setole>;
+defm OLE : BinaryInvCmpMCf32<IL_OP_LT, setogt>;
+defm OGT : BinaryInvCmpMCf64<IL_OP_D_GE,setole>;
+defm OLE : BinaryInvCmpMCf64<IL_OP_D_LT,setogt>;
+defm UGT : BinaryInvCmpMCf32<IL_OP_GE, setule>;
+defm ULE : BinaryInvCmpMCf32<IL_OP_LT, setugt>;
+defm UGT : BinaryInvCmpMCf64<IL_OP_D_GE,setule>;
+defm ULE : BinaryInvCmpMCf64<IL_OP_D_LT,setugt>;
+/* */
+
+
//===---------------------------------------------------------------------===//
// Scalar ==> Scalar conversion functions
//===---------------------------------------------------------------------===//
-// f32 ==> f64
-def FTOD : UnaryOp<IL_OP_F_2_D, fextend, GPRF64, GPRF32>;
-// f64 ==> f32
-def DTOF : UnaryOp<IL_OP_D_2_F, IL_d2f, GPRF32, GPRF64>;
-// f32 ==> i32 signed
-def FTOI : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRI32, GPRF32>;
-def FTOI_v2i32 : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRV2I32, GPRV2F32>;
-def FTOI_v4i32 : UnaryOp<IL_OP_FTOI, fp_to_sint, GPRV4I32, GPRV4F32>;
-// i32 ==> f32 signed
-def ITOF : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRF32, GPRI32>;
-def ITOF_v2f32 : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRV2F32, GPRV2I32>;
-def ITOF_v4f32 : UnaryOp<IL_OP_ITOF, sint_to_fp, GPRV4F32, GPRV4I32>;
-// f32 ==> i32 unsigned
-def FTOU : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRI32, GPRF32>;
-def FTOU_v2i32 : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRV2I32, GPRV2F32>;
-def FTOU_v4i32 : UnaryOp<IL_OP_FTOU, fp_to_uint, GPRV4I32, GPRV4F32>;
-// i32 ==> f32 unsigned
-def UTOF : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRF32, GPRI32>;
-def UTOF_v2f32 : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRV2F32, GPRV2I32>;
-def UTOF_v4f32 : UnaryOp<IL_OP_UTOF, uint_to_fp, GPRV4F32, GPRV4I32>;
-let Predicates = [HasHWDoubleConv] in {
- // f64 ==> i32 signed
- def DTOI : UnaryOp<IL_OP_DTOI, fp_to_sint, GPRI32, GPRF64>;
- // i32 ==> f64 signed
- def ITOD : UnaryOp<IL_OP_ITOD, sint_to_fp, GPRF64, GPRI32>;
- // f64 ==> i32 unsigned
- def DTOU : UnaryOp<IL_OP_DTOU, fp_to_uint, GPRI32, GPRF64>;
- // i32 ==> f64 unsigned
- def UTOD : UnaryOp<IL_OP_UTOD, uint_to_fp, GPRF64, GPRI32>;
-}
-// Get upper 32 bits of f64
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def DHI : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+defm FTOD : ConvertOpMC<IL_OP_F_2_D, fextend, f64, GPRF64, f32, GPRF32, f32imm, fpimm>;
+defm DTOF : ConvertOpMC<IL_OP_D_2_F, IL_d2f, f32, GPRF32, f64, GPRF64, f64imm, fpimm>;
+defm FTOIi32 : ConvertOpMC<IL_OP_FTOI, fp_to_sint, i32, GPRI32, f32, GPRF32, f32imm, fpimm>;
+defm FTOIv2i32 : ConvertOpMC<IL_OP_FTOI, fp_to_sint, v2i32, GPRV2I32, v2f32, GPRV2F32, v2f32imm, fpimm>;
+defm FTOIv4i32 : ConvertOpMC<IL_OP_FTOI, fp_to_sint, v4i32, GPRV4I32, v4f32, GPRV4F32, v4f32imm, fpimm>;
+defm ITOFf32 : ConvertOpMC<IL_OP_ITOF, sint_to_fp, f32, GPRF32, i32, GPRI32, i32imm, imm>;
+defm ITOFv2f32 : ConvertOpMC<IL_OP_ITOF, sint_to_fp, v2f32, GPRV2F32, v2i32, GPRV2I32, v2i32imm, imm>;
+defm ITOFv4f32 : ConvertOpMC<IL_OP_ITOF, sint_to_fp, v4f32, GPRV4F32, v4i32, GPRV4I32, v4i32imm, imm>;
+defm FTOUi32 : ConvertOpMC<IL_OP_FTOU, fp_to_uint, i32, GPRI32, f32, GPRF32, f32imm, fpimm>;
+defm FTOUv2i32 : ConvertOpMC<IL_OP_FTOU, fp_to_uint, v2i32, GPRV2I32, v2f32, GPRV2F32, v2f32imm, fpimm>;
+defm FTOUv4i32 : ConvertOpMC<IL_OP_FTOU, fp_to_uint, v4i32, GPRV4I32, v4f32, GPRV4F32, v4f32imm, fpimm>;
+defm UTOFf32 : ConvertOpMC<IL_OP_UTOF, uint_to_fp, f32, GPRF32, i32, GPRI32, i32imm, imm>;
+defm UTOFv2f32 : ConvertOpMC<IL_OP_UTOF, uint_to_fp, v2f32, GPRV2F32, v2i32, GPRV2I32, v2i32imm, imm>;
+defm UTOFv4f32 : ConvertOpMC<IL_OP_UTOF, uint_to_fp, v4f32, GPRV4F32, v4i32, GPRV4I32, v4i32imm, imm>;
+defm DTOI : ConvertOpMC<IL_OP_DTOI, fp_to_sint, i32, GPRI32, f64, GPRF64, f64imm, fpimm>;
+defm ITOD : ConvertOpMC<IL_OP_ITOD, sint_to_fp, f64, GPRF64, i32, GPRI32, i32imm, imm>;
+defm DTOU : ConvertOpMC<IL_OP_DTOU, fp_to_uint, i32, GPRI32, f64, GPRF64, f64imm, fpimm>;
+defm UTOD : ConvertOpMC<IL_OP_UTOD, uint_to_fp, f64, GPRF64, i32, GPRI32, i32imm, imm>;
+let swizzle = 2 in {
+ def DHIf64r : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
(ins GPRF64:$src),
!strconcat(IL_OP_MOV.Text, " $dst, $src"),
[(set GPRI32:$dst, (IL_dcomphi GPRF64:$src))]>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def DHI_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ def DHIv2f64r : ILFormat<IL_OP_MOV, (outs GPRV2I32:$dst),
(ins GPRV2F64:$src),
!strconcat(IL_OP_MOV.Text, " $dst, $src"),
[(set GPRV2I32:$dst, (IL_dcomphi2 GPRV2F64:$src))]>;
-// Get lower 32 bits of f64
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def DLO : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ def DLOf64r : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
(ins GPRF64:$src),
!strconcat(IL_OP_MOV.Text, " $dst, $src"),
[(set GPRI32:$dst, (IL_dcomplo GPRF64:$src))]>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def DLO_v2f64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ def DLOv2f64r : ILFormat<IL_OP_MOV, (outs GPRV2I32:$dst),
(ins GPRV2F64:$src),
!strconcat(IL_OP_MOV.Text, " $dst, $src"),
[(set GPRV2I32:$dst, (IL_dcomplo2 GPRV2F64:$src))]>;
-// Convert two 32 bit integers into a f64
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def DCREATE : TwoInOneOut<IL_OP_I_ADD, (outs GPRF64:$dst),
+}
+let swizzle = 6 in {
+ def DCREATEf64rr : ILFormat<IL_OP_I_ADD, (outs GPRF64:$dst),
(ins GPRI32:$src0, GPRI32:$src1),
!strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
[(set GPRF64:$dst, (IL_dcreate GPRI32:$src0, GPRI32:$src1))]>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def DCREATE_v2f64 : TwoInOneOut<IL_OP_I_ADD, (outs GPRV2F64:$dst),
+ def DCREATEv2f64rr : ILFormat<IL_OP_I_ADD, (outs GPRV2F64:$dst),
(ins GPRV2I32:$src0, GPRV2I32:$src1),
!strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
[(set GPRV2F64:$dst,
(IL_dcreate2 GPRV2I32:$src0, GPRV2I32:$src1))]>;
-// Get upper 32 bits of i64
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def LHI : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+}
+let swizzle = 2 in {
+ def LHIi64r : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
(ins GPRI64:$src),
!strconcat(IL_OP_MOV.Text, " $dst, $src"),
[(set GPRI32:$dst, (IL_lcomphi GPRI64:$src))]>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def LHI_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ def LHIv2i64r : ILFormat<IL_OP_MOV, (outs GPRV2I32:$dst),
(ins GPRV2I64:$src),
!strconcat(IL_OP_MOV.Text, " $dst, $src"),
[(set GPRV2I32:$dst, (IL_lcomphi2 GPRV2I64:$src))]>;
-// Get lower 32 bits of i64
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def LLO : OneInOneOut<IL_OP_MOV, (outs GPRI32:$dst),
+ def LLOi64r : ILFormat<IL_OP_MOV, (outs GPRI32:$dst),
(ins GPRI64:$src),
!strconcat(IL_OP_MOV.Text, " $dst, $src"),
[(set GPRI32:$dst, (IL_lcomplo GPRI64:$src))]>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def LLO_v2i64 : OneInOneOut<IL_OP_MOV, (outs GPRV2I32:$dst),
+ def LLOv2i64r : ILFormat<IL_OP_MOV, (outs GPRV2I32:$dst),
(ins GPRV2I64:$src),
!strconcat(IL_OP_MOV.Text, " $dst, $src"),
[(set GPRV2I32:$dst, (IL_lcomplo2 GPRV2I64:$src))]>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def HILO_BITOR_v4i16 : TwoInOneOut<IL_OP_I_OR, (outs GPRI32:$dst),
+}
+let swizzle = 6 in {
+ let swizzle = 1 in {
+ def HILO_BITORv4i16rr : ILFormat<IL_OP_I_OR, (outs GPRI32:$dst),
(ins GPRI32:$src, GPRI32:$src2),
!strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def HILO_BITOR_v2i32 : TwoInOneOut<IL_OP_I_OR, (outs GPRI32:$dst),
+ def HILO_BITORv2i32rr : ILFormat<IL_OP_I_OR, (outs GPRI32:$dst),
(ins GPRI32:$src, GPRI32:$src2),
!strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def HILO_BITOR_v2i64 : TwoInOneOut<IL_OP_I_OR, (outs GPRI64:$dst),
+ }
+ def HILO_BITORv2i64rr : ILFormat<IL_OP_I_OR, (outs GPRI64:$dst),
(ins GPRI64:$src, GPRI64:$src2),
!strconcat(IL_OP_I_OR.Text, " $dst, $src, $src2"), []>;
-// Convert two 32 bit integers into a i64
-// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def LCREATE : TwoInOneOut<IL_OP_I_ADD, (outs GPRI64:$dst),
+}
+let swizzle = 6 in {
+def LCREATEi64rr : ILFormat<IL_OP_I_ADD, (outs GPRI64:$dst),
(ins GPRI32:$src0, GPRI32:$src1),
!strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
[(set GPRI64:$dst, (IL_lcreate GPRI32:$src0, GPRI32:$src1))]>;
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-def LCREATE_v2i64 : TwoInOneOut<IL_OP_I_ADD, (outs GPRV2I64:$dst),
+def LCREATEv2i64rr : ILFormat<IL_OP_I_ADD, (outs GPRV2I64:$dst),
(ins GPRV2I32:$src0, GPRV2I32:$src1),
!strconcat(IL_OP_I_ADD.Text, " $dst, $src0, $src1"),
[(set GPRV2I64:$dst,
(IL_lcreate2 GPRV2I32:$src0, GPRV2I32:$src1))]>;
+}
//===---------------------------------------------------------------------===//
// Scalar ==> Vector conversion functions
//===---------------------------------------------------------------------===//
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
-defm VCREATE : UnaryOpMCVec<IL_OP_MOV, IL_vbuild>;
+defm VCREATE : VectorCreate<IL_OP_MOV, IL_vbuild>;
//===---------------------------------------------------------------------===//
// Vector ==> Scalar conversion functions
@@ -877,42 +487,41 @@
//===---------------------------------------------------------------------===//
// Bit conversion functions
//===---------------------------------------------------------------------===//
-defm IL_ASCHAR : BitConversion<IL_OP_MOV, GPRI8, IL_bitconv>;
-defm IL_ASSHORT : BitConversion<IL_OP_MOV, GPRI16, IL_bitconv>;
-defm IL_ASINT : BitConversion<IL_OP_MOV, GPRI32, IL_bitconv>;
-defm IL_ASFLOAT : BitConversion<IL_OP_MOV, GPRF32, IL_bitconv>;
-defm IL_ASDOUBLE : BitConversion<IL_OP_MOV, GPRF64, IL_bitconv>;
-defm IL_ASLONG : BitConversion<IL_OP_MOV, GPRI64, IL_bitconv>;
-defm IL_ASV2CHAR : BitConversion<IL_OP_MOV, GPRV2I8, IL_bitconv>;
-defm IL_ASV2SHORT : BitConversion<IL_OP_MOV, GPRV2I16, IL_bitconv>;
-defm IL_ASV2INT : BitConversion<IL_OP_MOV, GPRV2I32, IL_bitconv>;
-defm IL_ASV2FLOAT : BitConversion<IL_OP_MOV, GPRV2F32, IL_bitconv>;
-defm IL_ASV2DOUBLE : BitConversion<IL_OP_MOV, GPRV2F64, IL_bitconv>;
-defm IL_ASV2LONG : BitConversion<IL_OP_MOV, GPRV2I64, IL_bitconv>;
-defm IL_ASV4CHAR : BitConversion<IL_OP_MOV, GPRV4I8, IL_bitconv>;
-defm IL_ASV4SHORT : BitConversion<IL_OP_MOV, GPRV4I16, IL_bitconv>;
-defm IL_ASV4INT : BitConversion<IL_OP_MOV, GPRV4I32, IL_bitconv>;
-defm IL_ASV4FLOAT : BitConversion<IL_OP_MOV, GPRV4F32, IL_bitconv>;
+defm IL_ASCHAR : BitConversion<IL_OP_MOV, IL_bitconv, i8, GPRI8>;
+defm IL_ASSHORT : BitConversion<IL_OP_MOV, IL_bitconv, i16, GPRI16>;
+defm IL_ASINT : BitConversion<IL_OP_MOV, IL_bitconv, i32, GPRI32>;
+defm IL_ASFLOAT : BitConversion<IL_OP_MOV, IL_bitconv, f32, GPRF32>;
+defm IL_ASDOUBLE : BitConversion<IL_OP_MOV, IL_bitconv, f64, GPRF64>;
+defm IL_ASLONG : BitConversion<IL_OP_MOV, IL_bitconv, i64, GPRI64>;
+defm IL_ASV2CHAR : BitConversion<IL_OP_MOV, IL_bitconv, v2i8, GPRV2I8>;
+defm IL_ASV2SHORT : BitConversion<IL_OP_MOV, IL_bitconv, v2i16, GPRV2I16>;
+defm IL_ASV2INT : BitConversion<IL_OP_MOV, IL_bitconv, v2i32, GPRV2I32>;
+defm IL_ASV2FLOAT : BitConversion<IL_OP_MOV, IL_bitconv, v2f32, GPRV2F32>;
+defm IL_ASV2DOUBLE : BitConversion<IL_OP_MOV, IL_bitconv, v2f64, GPRV2F64>;
+defm IL_ASV2LONG : BitConversion<IL_OP_MOV, IL_bitconv, v2i64, GPRV2I64>;
+defm IL_ASV4CHAR : BitConversion<IL_OP_MOV, IL_bitconv, v4i8, GPRV4I8>;
+defm IL_ASV4SHORT : BitConversion<IL_OP_MOV, IL_bitconv, v4i16, GPRV4I16>;
+defm IL_ASV4INT : BitConversion<IL_OP_MOV, IL_bitconv, v4i32, GPRV4I32>;
+defm IL_ASV4FLOAT : BitConversion<IL_OP_MOV, IL_bitconv, v4f32, GPRV4F32>;
//===---------------------------------------------------------------------===//
// Custom Inserter for Branches and returns, this eventually will be a
// seperate pass
//===---------------------------------------------------------------------===//
let isTerminator = 1 in {
- def BRANCH : ILFormat<IL_PSEUDO_INST, (outs), (ins brtarget:$target),
- "; Pseudo unconditional branch instruction",
- [(br bb:$target)]>;
- defm BRANCH_COND : BranchConditional<IL_brcond>;
+ defm BRANCH : BranchMC<IL_brcond>;
}
//===---------------------------------------------------------------------===//
// return instructions
//===---------------------------------------------------------------------===//
let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
+ let swizzle = 63 in {
def RETURN : ILFormat<IL_OP_RET,(outs), (ins variable_ops),
IL_OP_RET.Text, []>;
def RETDYN : ILFormat<IL_OP_RET_DYN, (outs), (ins variable_ops),
IL_OP_RET_DYN.Text, [(IL_retflag)]>;
}
+}
//===---------------------------------------------------------------------===//
// Lower and raise the stack x amount
//===---------------------------------------------------------------------===//
@@ -932,11 +541,13 @@
R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16,
R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32
] in {
- def CALL : UnaryOpNoRet<IL_OP_CALL, (outs),
+ let swizzle = 2 in {
+ def CALL : ILFormat<IL_OP_CALL, (outs),
(ins calltarget:$dst, variable_ops),
!strconcat(IL_OP_CALL.Text, " $dst"), []>;
}
}
+}
//===---------------------------------------------------------------------===//
@@ -988,8 +599,7 @@
defm CONTINUEC : BranchInstr2<IL_OP_CONTINUEC>;
}
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
- def TRAP : ILFormat<IL_OP_NOP, (outs), (ins),
- IL_OP_NOP.Text, [(trap)]>;
+ def TRAP : ILFormat<IL_OP_NOP, (outs), (ins), IL_OP_NOP.Text, [(trap)]>;
}
//===---------------------------------------------------------------------===//
@@ -1040,271 +650,167 @@
//------------- Synchronization Functions - OpenCL 6.11.9 -------------------//
//===---------------------------------------------------------------------===//
let isCall=1 in {
- def FENCE : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
- "fence_lds_memory_gds",
- [(int_AMDIL_fence GPRI32:$flag)]>;
-
- def FENCE_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
- "fence_lds",
- [(int_AMDIL_fence_local GPRI32:$flag)]>;
-
- def FENCE_GLOBAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
- "fence_memory",
- [(int_AMDIL_fence_global GPRI32:$flag)]>;
-
- def FENCE_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
- "fence_memory_lds",
- [(int_AMDIL_fence_global_local GPRI32:$flag)]>;
-
- def FENCE_REGION : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
- "fence_gds",
- [(int_AMDIL_fence_region GPRI32:$flag)]>;
-
- def FENCE_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
- "fence_gds_lds",
- [(int_AMDIL_fence_region_local GPRI32:$flag)]>;
-
- def FENCE_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE, (outs), (ins GPRI32:$flag),
- "fence_gds_memory",
- [(int_AMDIL_fence_region_global GPRI32:$flag)]>;
-
- def FENCE_READ_ONLY : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
- "fence_lds_memory_gds_mem_read_only",
- [(int_AMDIL_read_fence GPRI32:$flag)]>;
-
- def FENCE_READ_ONLY_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
- "fence_lds_mem_read_only",
- [(int_AMDIL_read_fence_local GPRI32:$flag)]>;
-
- def FENCE_READ_ONLY_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
- "fence_memory_mem_read_only",
- [(int_AMDIL_read_fence_global GPRI32:$flag)]>;
-
- def FENCE_READ_ONLY_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
- "fence_memory_lds_mem_read_only",
- [(int_AMDIL_read_fence_global_local GPRI32:$flag)]>;
-
- def FENCE_READ_ONLY_REGION : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
- "fence_gds_mem_read_only",
- [(int_AMDIL_read_fence_region GPRI32:$flag)]>;
-
- def FENCE_READ_ONLY_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
- "fence_gds_lds_mem_read_only",
- [(int_AMDIL_read_fence_region_local GPRI32:$flag)]>;
-
- def FENCE_READ_ONLY_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_READ_ONLY, (outs), (ins GPRI32:$flag),
- "fence_gds_memory_mem_read_only",
- [(int_AMDIL_read_fence_region_global GPRI32:$flag)]>;
-
- def FENCE_WRITE_ONLY : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
- "fence_lds_memory_gds_mem_write_only",
- [(int_AMDIL_write_fence GPRI32:$flag)]>;
-
- def FENCE_WRITE_ONLY_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
- "fence_lds_mem_write_only",
- [(int_AMDIL_write_fence_local GPRI32:$flag)]>;
-
- def FENCE_WRITE_ONLY_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
- "fence_memory_mem_write_only",
- [(int_AMDIL_write_fence_global GPRI32:$flag)]>;
-
- def FENCE_WRITE_ONLY_GLOBAL_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
- "fence_memory_lds_mem_write_only",
- [(int_AMDIL_write_fence_global_local GPRI32:$flag)]>;
-
- def FENCE_WRITE_ONLY_REGION : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
- "fence_gds_mem_write_only",
- [(int_AMDIL_write_fence_region GPRI32:$flag)]>;
-
- def FENCE_WRITE_ONLY_REGION_LOCAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
- "fence_gds_lds_mem_write_only",
- [(int_AMDIL_write_fence_region_local GPRI32:$flag)]>;
-
- def FENCE_WRITE_ONLY_REGION_GLOBAL : BinaryOpNoRet<IL_OP_FENCE_WRITE_ONLY, (outs), (ins GPRI32:$flag),
- "fence_gds_memory_mem_write_only",
- [(int_AMDIL_write_fence_region_global GPRI32:$flag)]>;
+ // The 'S' suffix is for the '_gws' flag.
+ // The 'L' suffix is for the '_lds' flag.
+ // The 'M' suffix is for the '_memory' flag.
+ // The 'G' suffix is for the '_gds' flag.
+ // The 'RO' suffix is for the '_read_only' flag.
+ // The 'WO' suffix is for the '_write_only' flag.
+ let gws = 1 in {
+ defm FENCE_S : Fence<IL_OP_FENCE_S, int_AMDIL_gws>;
+ defm FENCE_LS : Fence<IL_OP_FENCE_LS, int_AMDIL_gws_local>;
+ defm FENCE_MS : Fence<IL_OP_FENCE_MS, int_AMDIL_gws_global>;
+ defm FENCE_GS : Fence<IL_OP_FENCE_GS, int_AMDIL_gws_region>;
+ defm FENCE_LMS : Fence<IL_OP_FENCE_LMS, int_AMDIL_gws_global_local>;
+ defm FENCE_MGS : Fence<IL_OP_FENCE_MGS, int_AMDIL_gws_global_region>;
+ defm FENCE_LGS : Fence<IL_OP_FENCE_LGS, int_AMDIL_gws_local_region>;
+ }
+ defm FENCE : Fence<IL_OP_FENCE_LMG,
+ int_AMDIL_fence>;
+ defm FENCE_L : Fence<IL_OP_FENCE_L,
+ int_AMDIL_fence_local>;
+ defm FENCE_M : Fence<IL_OP_FENCE_M,
+ int_AMDIL_fence_global>;
+ defm FENCE_G : Fence<IL_OP_FENCE_G,
+ int_AMDIL_fence_region>;
+ defm FENCE_LM : Fence<IL_OP_FENCE_LM,
+ int_AMDIL_fence_global_local>;
+ defm FENCE_LG : Fence<IL_OP_FENCE_LG,
+ int_AMDIL_fence_region_local>;
+ defm FENCE_MG : Fence<IL_OP_FENCE_MG,
+ int_AMDIL_fence_region_global>;
+ defm FENCE_RO : Fence<IL_OP_FENCE_RO_LMG,
+ int_AMDIL_read_fence>;
+ defm FENCE_RO_L : Fence<IL_OP_FENCE_RO_L,
+ int_AMDIL_read_fence_local>;
+ defm FENCE_RO_M : Fence<IL_OP_FENCE_RO_M,
+ int_AMDIL_read_fence_global>;
+ defm FENCE_RO_G : Fence<IL_OP_FENCE_RO_G,
+ int_AMDIL_read_fence_region>;
+ defm FENCE_RO_LM : Fence<IL_OP_FENCE_RO_LM,
+ int_AMDIL_read_fence_global_local>;
+ defm FENCE_RO_LG : Fence<IL_OP_FENCE_RO_LG,
+ int_AMDIL_read_fence_region_local>;
+ defm FENCE_RO_MG : Fence<IL_OP_FENCE_RO_MG,
+ int_AMDIL_read_fence_region_global>;
+ defm FENCE_WO : Fence<IL_OP_FENCE_WO_LMG,
+ int_AMDIL_write_fence>;
+ defm FENCE_WO_L : Fence<IL_OP_FENCE_WO_L,
+ int_AMDIL_write_fence_local>;
+ defm FENCE_WO_M : Fence<IL_OP_FENCE_WO_M,
+ int_AMDIL_write_fence_global>;
+ defm FENCE_WO_G : Fence<IL_OP_FENCE_WO_G,
+ int_AMDIL_write_fence_region>;
+ defm FENCE_WO_LM : Fence<IL_OP_FENCE_WO_LM,
+ int_AMDIL_write_fence_global_local>;
+ defm FENCE_WO_LG : Fence<IL_OP_FENCE_WO_LG,
+ int_AMDIL_write_fence_region_local>;
+ defm FENCE_WO_MG : Fence<IL_OP_FENCE_WO_MG,
+ int_AMDIL_write_fence_region_global>;
}
let isReturn = 1 in {
- def EARLY_EXIT : UnaryOpNoRet<IL_OP_RET_LOGICALNZ, (outs),
- (ins GPRI32:$flag),
- !strconcat(IL_OP_RET_LOGICALNZ.Text, " $flag"),
- [(int_AMDIL_early_exit GPRI32:$flag)]>;
-}
-def MEDIA_UNPACK_0 : OneInOneOut<IL_OP_UNPACK_0, (outs GPRV4F32:$dst),
- (ins GPRV4I32:$src),
- !strconcat(IL_OP_UNPACK_0.Text, " $dst, $src"),
- [(set GPRV4F32:$dst,
- (v4f32 (int_AMDIL_media_unpack_byte_0 GPRV4I32:$src)))]>;
-def MEDIA_UNPACK_1 : OneInOneOut<IL_OP_UNPACK_1, (outs GPRV4F32:$dst),
- (ins GPRV4I32:$src),
- !strconcat(IL_OP_UNPACK_1.Text, " $dst, $src"),
- [(set GPRV4F32:$dst,
- (v4f32 (int_AMDIL_media_unpack_byte_1 GPRV4I32:$src)))]>;
-def MEDIA_UNPACK_2 : OneInOneOut<IL_OP_UNPACK_2, (outs GPRV4F32:$dst),
- (ins GPRV4I32:$src),
- !strconcat(IL_OP_UNPACK_2.Text, " $dst, $src"),
- [(set GPRV4F32:$dst,
- (v4f32 (int_AMDIL_media_unpack_byte_2 GPRV4I32:$src)))]>;
-def MEDIA_UNPACK_3 : OneInOneOut<IL_OP_UNPACK_3, (outs GPRV4F32:$dst),
- (ins GPRV4I32:$src),
- !strconcat(IL_OP_UNPACK_3.Text, " $dst, $src"),
- [(set GPRV4F32:$dst,
- (v4f32 (int_AMDIL_media_unpack_byte_3 GPRV4I32:$src)))]>;
-
-def SEMAPHORE_INIT : BinaryOpNoRet<IL_OP_SEMAPHORE_INIT, (outs),
- (ins MEM3232:$ptr, GPRI32:$val),
- !strconcat(IL_OP_SEMAPHORE_INIT.Text, "_id($ptr)_value($val)"),
- [(int_AMDIL_semaphore_init ADDR:$ptr, GPRI32:$val)]>;
-
-def SEMAPHORE_WAIT : UnaryOpNoRet<IL_OP_SEMAPHORE_WAIT, (outs),
- (ins MEM3232:$ptr),
- !strconcat(IL_OP_SEMAPHORE_WAIT.Text, "_id($ptr)"),
- [(int_AMDIL_semaphore_wait ADDR:$ptr)]>;
-
-def SEMAPHORE_SIGNAL : UnaryOpNoRet<IL_OP_SEMAPHORE_SIGNAL, (outs),
- (ins MEM3232:$ptr),
- !strconcat(IL_OP_SEMAPHORE_SIGNAL.Text, "_id($ptr)"),
- [(int_AMDIL_semaphore_signal ADDR:$ptr)]>;
-
-let hasIEEEFlag = 1 in {
- defm MIN3 : TernaryIntrinsicFloat<IL_OP_MIN3, int_AMDIL_min3>;
- defm MED3 : TernaryIntrinsicFloat<IL_OP_MED3, int_AMDIL_med3>;
- defm MAX3 : TernaryIntrinsicFloat<IL_OP_MAX3, int_AMDIL_max3>;
- defm IMIN3 : TernaryIntrinsicInt<IL_OP_I_MIN3, int_AMDIL_min3_i32>;
- defm IMED3 : TernaryIntrinsicInt<IL_OP_I_MED3, int_AMDIL_med3_i32>;
- defm IMAX3 : TernaryIntrinsicInt<IL_OP_I_MAX3, int_AMDIL_max3_i32>;
- defm UMIN3 : TernaryIntrinsicInt<IL_OP_U_MIN3, int_AMDIL_min3_u32>;
- defm UMED3 : TernaryIntrinsicInt<IL_OP_U_MED3, int_AMDIL_med3_u32>;
- defm UMAX3 : TernaryIntrinsicInt<IL_OP_U_MAX3, int_AMDIL_max3_u32>;
-}
-
-def CLASS_f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRI32:$dst),
- (ins GPRF32:$src, GPRI32:$flag),
- !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
- [(set GPRI32:$dst,
- (int_AMDIL_class GPRF32:$src, GPRI32:$flag))]>;
-def CLASS_v2f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRV2I32:$dst),
- (ins GPRV2F32:$src, GPRV2I32:$flag),
- !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
- [(set GPRV2I32:$dst,
- (int_AMDIL_class GPRV2F32:$src, GPRV2I32:$flag))]>;
-def CLASS_v4f32 : TwoInOneOut<IL_OP_CLASS, (outs GPRV4I32:$dst),
- (ins GPRV4F32:$src, GPRV4I32:$flag),
- !strconcat(IL_OP_CLASS.Text, " $dst, $src, $flag"),
- [(set GPRV4I32:$dst,
- (int_AMDIL_class GPRV4F32:$src, GPRV4I32:$flag))]>;
-def CLASS_f64 : TwoInOneOut<IL_OP_D_CLASS, (outs GPRI32:$dst),
- (ins GPRF64:$src, GPRI32:$flag),
- !strconcat(IL_OP_D_CLASS.Text, " $dst, $src, $flag"),
- [(set GPRI32:$dst,
- (int_AMDIL_class GPRF64:$src, GPRI32:$flag))]>;
-def CLASS_v2f64 : TwoInOneOut<IL_OP_D_CLASS, (outs GPRV2I32:$dst),
- (ins GPRV2F64:$src, GPRV2I32:$flag),
- !strconcat(IL_OP_D_CLASS.Text, " $dst, $src, $flag"),
- [(set GPRV2I32:$dst,
- (int_AMDIL_class GPRV2F64:$src, GPRV2I32:$flag))]>;
+ defm EARLY_EXIT : UnaryIntMCVoid<IL_OP_RET_LOGICALNZ, int_AMDIL_early_exit,
+ i32, GPRI32, i32imm, imm, " $src0">;
+}
+defm MEDIA_UNPACK_0 : ConvertIntMC<IL_OP_UNPACK_0,
+ int_AMDIL_media_unpack_byte_0, v4f32, GPRV4F32,
+ v4i32, GPRV4I32, v4i32imm, imm>;
+defm MEDIA_UNPACK_1 : ConvertIntMC<IL_OP_UNPACK_1,
+ int_AMDIL_media_unpack_byte_1, v4f32, GPRV4F32,
+ v4i32, GPRV4I32, v4i32imm, imm>;
+defm MEDIA_UNPACK_2 : ConvertIntMC<IL_OP_UNPACK_2,
+ int_AMDIL_media_unpack_byte_2, v4f32, GPRV4F32,
+ v4i32, GPRV4I32, v4i32imm, imm>;
+defm MEDIA_UNPACK_3 : ConvertIntMC<IL_OP_UNPACK_3,
+ int_AMDIL_media_unpack_byte_3, v4f32, GPRV4F32,
+ v4i32, GPRV4I32, v4i32imm, imm>;
+
+let ieee = 1 in {
+ defm MIN3 : TernaryIntMCf32<IL_OP_MIN3, int_AMDIL_min3>;
+ defm MED3 : TernaryIntMCf32<IL_OP_MED3, int_AMDIL_med3>;
+ defm MAX3 : TernaryIntMCf32<IL_OP_MAX3, int_AMDIL_max3>;
+ defm IMIN3 : TernaryIntMCInt<IL_OP_I_MIN3, int_AMDIL_min3_i32>;
+ defm IMED3 : TernaryIntMCInt<IL_OP_I_MED3, int_AMDIL_med3_i32>;
+ defm IMAX3 : TernaryIntMCInt<IL_OP_I_MAX3, int_AMDIL_max3_i32>;
+ defm UMIN3 : TernaryIntMCInt<IL_OP_U_MIN3, int_AMDIL_min3_u32>;
+ defm UMED3 : TernaryIntMCInt<IL_OP_U_MED3, int_AMDIL_med3_u32>;
+ defm UMAX3 : TernaryIntMCInt<IL_OP_U_MAX3, int_AMDIL_max3_u32>;
+}
+defm CLASS : ClassMC<int_AMDIL_class>;
defm FREXP_EXP : IntrConvertF32TOI32<IL_OP_FREXP_EXP, int_AMDIL_frexp_exp>;
-def FREXP_EXP_f64 : OneInOneOut<IL_OP_D_FREXP_EXP, (outs GPRI32:$dst),
- (ins GPRF64:$src),
- !strconcat(IL_OP_D_FREXP_EXP.Text, " $dst, $src"),
- [(set GPRI32:$dst,
- (int_AMDIL_frexp_exp GPRF64:$src))]>;
-def FREXP_EXP_v2f64 : OneInOneOut<IL_OP_D_FREXP_EXP, (outs GPRV2I32:$dst),
- (ins GPRV2F64:$src),
- !strconcat(IL_OP_D_FREXP_EXP.Text, " $dst, $src"),
- [(set GPRV2I32:$dst,
- (int_AMDIL_frexp_exp GPRV2F64:$src))]>;
-
-defm FREXP_MANT : UnaryIntrinsicFloat<IL_OP_FREXP_MANT, int_AMDIL_frexp_mant>;
-defm SAD16 : TernaryIntrinsicInt<IL_OP_SAD_U16, int_AMDIL_media_sad16>;
-defm SAD32 : TernaryIntrinsicInt<IL_OP_SAD_U32, int_AMDIL_media_sad32>;
+defm DFREXP_EXP : IntrConvertF64TOI32<IL_OP_D_FREXP_EXP, int_AMDIL_frexp_exp>;
-let hasZeroOpFlag = 1 in {
+defm FREXP_MANT : UnaryIntMCf32<IL_OP_FREXP_MANT, int_AMDIL_frexp_mant>;
+defm SAD16 : TernaryIntMCInt<IL_OP_SAD_U16, int_AMDIL_media_sad16>;
+defm SAD32 : TernaryIntMCInt<IL_OP_SAD_U32, int_AMDIL_media_sad32>;
+
+let zeroop = 1 in {
let mayLoad = 0, mayStore=0 in {
-defm DDIV_INT : BinaryIntrinsicDouble<IL_OP_D_DIV, int_AMDIL_div>;
+defm DDIV_INT : BinaryIntMCf64<IL_OP_D_DIV, int_AMDIL_div>;
defm DDIV : BinaryOpMCf64<IL_OP_D_DIV, fdiv>;
}
}
-defm FREXP_MANT : UnaryIntrinsicDouble<IL_OP_D_FREXP_MANT, int_AMDIL_frexp_mant>;
-
-def DTRIG_PREOP : TwoInOneOut<IL_OP_D_TRIG_PREOP, (outs GPRF64:$dst),
- (ins GPRF64:$src0, GPRF32:$src1),
- !strconcat(IL_OP_D_TRIG_PREOP.Text, " $dst, $src0, $src1"),
- [(set GPRF64:$dst,
- (int_AMDIL_trig_preop_f64 GPRF64:$src0, GPRF32:$src1))]>;
-
-
-def LDEXP_f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRF32:$dst),
- (ins GPRF32:$src, GPRI32:$src1),
- !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
- [(set GPRF32:$dst,
- (int_AMDIL_ldexp GPRF32:$src, GPRI32:$src1))]>;
-
-def LDEXP_v2f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRV2F32:$dst),
- (ins GPRV2F32:$src, GPRV2I32:$src1),
- !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
- [(set GPRV2F32:$dst,
- (int_AMDIL_ldexp GPRV2F32:$src, GPRV2I32:$src1))]>;
-
-def LDEXP_v4f32 : TwoInOneOut<IL_OP_LDEXP, (outs GPRV4F32:$dst),
- (ins GPRV4F32:$src, GPRV4I32:$src1),
- !strconcat(IL_OP_LDEXP.Text, " $dst, $src, $src1"),
- [(set GPRV4F32:$dst,
- (int_AMDIL_ldexp GPRV4F32:$src, GPRV4I32:$src1))]>;
-defm MSAD : TernaryIntrinsicInt<IL_OP_MSAD, int_AMDIL_media_msad>;
-
-def QSAD_i64 : ThreeInOneOut<IL_OP_QSAD, (outs GPRI64:$dst),
- (ins GPRI64:$src0, GPRI32:$src1, GPRI64:$src2),
- !strconcat(IL_OP_QSAD.Text, " $dst, $src0, $src1, $src2"),
- [(set GPRI64:$dst,
- (int_AMDIL_media_qsad GPRI64:$src0, GPRI32:$src1, GPRI64:$src2))]>;
-
-def MQSAD_i64 : ThreeInOneOut<IL_OP_MQSAD, (outs GPRI64:$dst),
- (ins GPRI64:$src0, GPRI32:$src1, GPRI64:$src2),
- !strconcat(IL_OP_MQSAD.Text, " $dst, $src0, $src1, $src2"),
- [(set GPRI64:$dst,
- (int_AMDIL_media_mqsad GPRI64:$src0, GPRI32:$src1, GPRI64:$src2))]>;
-
-defm ADD_RTE : BinaryIntrinsicFloat<IL_OP_ADD_RTE, int_AMDIL_add_rte>;
-defm ADD_RTP : BinaryIntrinsicFloat<IL_OP_ADD_RTP, int_AMDIL_add_rtp>;
-defm ADD_RTN : BinaryIntrinsicFloat<IL_OP_ADD_RTN, int_AMDIL_add_rtn>;
-defm ADD_RTZ : BinaryIntrinsicFloat<IL_OP_ADD_RTZ, int_AMDIL_add_rtz>;
-defm SUB_RTE : BinaryIntrinsicFloat<IL_OP_SUB_RTE, int_AMDIL_sub_rte>;
-defm SUB_RTP : BinaryIntrinsicFloat<IL_OP_SUB_RTP, int_AMDIL_sub_rtp>;
-defm SUB_RTN : BinaryIntrinsicFloat<IL_OP_SUB_RTN, int_AMDIL_sub_rtn>;
-defm SUB_RTZ : BinaryIntrinsicFloat<IL_OP_SUB_RTZ, int_AMDIL_sub_rtz>;
-defm MUL_RTE : BinaryIntrinsicFloat<IL_OP_MUL_RTE, int_AMDIL_mul_rte>;
-defm MUL_RTP : BinaryIntrinsicFloat<IL_OP_MUL_RTP, int_AMDIL_mul_rtp>;
-defm MUL_RTN : BinaryIntrinsicFloat<IL_OP_MUL_RTN, int_AMDIL_mul_rtn>;
-defm MUL_RTZ : BinaryIntrinsicFloat<IL_OP_MUL_RTZ, int_AMDIL_mul_rtz>;
-defm MAD_RTE : TernaryIntrinsicFloat<IL_OP_MAD_RTE, int_AMDIL_mad_rte>;
-defm MAD_RTP : TernaryIntrinsicFloat<IL_OP_MAD_RTP, int_AMDIL_mad_rtp>;
-defm MAD_RTN : TernaryIntrinsicFloat<IL_OP_MAD_RTN, int_AMDIL_mad_rtn>;
-defm MAD_RTZ : TernaryIntrinsicFloat<IL_OP_MAD_RTZ, int_AMDIL_mad_rtz>;
-defm FMA_RTE : TernaryIntrinsicFloat<IL_OP_FMA_RTE, int_AMDIL_fma_rte>;
-defm FMA_RTP : TernaryIntrinsicFloat<IL_OP_FMA_RTP, int_AMDIL_fma_rtp>;
-defm FMA_RTN : TernaryIntrinsicFloat<IL_OP_FMA_RTN, int_AMDIL_fma_rtn>;
-defm FMA_RTZ : TernaryIntrinsicFloat<IL_OP_FMA_RTZ, int_AMDIL_fma_rtz>;
-defm ADD_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTE, int_AMDIL_add_rte>;
-defm ADD_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTP, int_AMDIL_add_rtp>;
-defm ADD_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTN, int_AMDIL_add_rtn>;
-defm ADD_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_ADD_RTZ, int_AMDIL_add_rtz>;
-defm SUB_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTE, int_AMDIL_sub_rte>;
-defm SUB_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTP, int_AMDIL_sub_rtp>;
-defm SUB_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTN, int_AMDIL_sub_rtn>;
-defm SUB_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_SUB_RTZ, int_AMDIL_sub_rtz>;
-defm MUL_RTE : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTE, int_AMDIL_mul_rte>;
-defm MUL_RTP : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTP, int_AMDIL_mul_rtp>;
-defm MUL_RTN : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTN, int_AMDIL_mul_rtn>;
-defm MUL_RTZ : BinaryIntrinsicDoubleScalar<IL_OP_D_MUL_RTZ, int_AMDIL_mul_rtz>;
-defm MAD_RTE : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTE, int_AMDIL_mad_rte>;
-defm MAD_RTP : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTP, int_AMDIL_mad_rtp>;
-defm MAD_RTN : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTN, int_AMDIL_mad_rtn>;
-defm MAD_RTZ : TernaryIntrinsicDoubleScalar<IL_OP_D_MAD_RTZ, int_AMDIL_mad_rtz>;
-
+defm FREXP_MANT : UnaryIntMCf64<IL_OP_D_FREXP_MANT, int_AMDIL_frexp_mant>;
+defm DTRIG_PREOPf64 : BinaryIntMCFull<IL_OP_D_TRIG_PREOP,
+ int_AMDIL_trig_preop_f64, f64, GPRF64,
+ f64, GPRF64, f64imm, fpimm, f32, GPRF32, f32imm, fpimm>;
+
+defm LDEXP : BinaryIntMCFFI<IL_OP_LDEXP, int_AMDIL_ldexp>;
+defm MSAD : TernaryIntMCInt<IL_OP_MSAD, int_AMDIL_media_msad>;
+defm QSAD : TernaryIntLLIL64Scalar<IL_OP_QSAD, int_AMDIL_media_qsad>;
+defm MQSAD : TernaryIntLLIL64Scalar<IL_OP_MQSAD, int_AMDIL_media_mqsad>;
+
+defm ADD_RTE : BinaryIntMCf32<IL_OP_ADD_RTE, int_AMDIL_add_rte>;
+defm ADD_RTP : BinaryIntMCf32<IL_OP_ADD_RTP, int_AMDIL_add_rtp>;
+defm ADD_RTN : BinaryIntMCf32<IL_OP_ADD_RTN, int_AMDIL_add_rtn>;
+defm ADD_RTZ : BinaryIntMCf32<IL_OP_ADD_RTZ, int_AMDIL_add_rtz>;
+defm SUB_RTE : BinaryIntMCf32<IL_OP_SUB_RTE, int_AMDIL_sub_rte>;
+defm SUB_RTP : BinaryIntMCf32<IL_OP_SUB_RTP, int_AMDIL_sub_rtp>;
+defm SUB_RTN : BinaryIntMCf32<IL_OP_SUB_RTN, int_AMDIL_sub_rtn>;
+defm SUB_RTZ : BinaryIntMCf32<IL_OP_SUB_RTZ, int_AMDIL_sub_rtz>;
+defm MUL_RTE : BinaryIntMCf32<IL_OP_MUL_RTE, int_AMDIL_mul_rte>;
+defm MUL_RTP : BinaryIntMCf32<IL_OP_MUL_RTP, int_AMDIL_mul_rtp>;
+defm MUL_RTN : BinaryIntMCf32<IL_OP_MUL_RTN, int_AMDIL_mul_rtn>;
+defm MUL_RTZ : BinaryIntMCf32<IL_OP_MUL_RTZ, int_AMDIL_mul_rtz>;
+defm MAD_RTE : TernaryIntMCf32<IL_OP_MAD_RTE, int_AMDIL_mad_rte>;
+defm MAD_RTP : TernaryIntMCf32<IL_OP_MAD_RTP, int_AMDIL_mad_rtp>;
+defm MAD_RTN : TernaryIntMCf32<IL_OP_MAD_RTN, int_AMDIL_mad_rtn>;
+defm MAD_RTZ : TernaryIntMCf32<IL_OP_MAD_RTZ, int_AMDIL_mad_rtz>;
+defm FMA_RTE : TernaryIntMCf32<IL_OP_FMA_RTE, int_AMDIL_fma_rte>;
+defm FMA_RTP : TernaryIntMCf32<IL_OP_FMA_RTP, int_AMDIL_fma_rtp>;
+defm FMA_RTN : TernaryIntMCf32<IL_OP_FMA_RTN, int_AMDIL_fma_rtn>;
+defm FMA_RTZ : TernaryIntMCf32<IL_OP_FMA_RTZ, int_AMDIL_fma_rtz>;
+defm ADD_RTE : BinaryIntMCf64Scalar<IL_OP_D_ADD_RTE, int_AMDIL_add_rte>;
+defm ADD_RTP : BinaryIntMCf64Scalar<IL_OP_D_ADD_RTP, int_AMDIL_add_rtp>;
+defm ADD_RTN : BinaryIntMCf64Scalar<IL_OP_D_ADD_RTN, int_AMDIL_add_rtn>;
+defm ADD_RTZ : BinaryIntMCf64Scalar<IL_OP_D_ADD_RTZ, int_AMDIL_add_rtz>;
+defm SUB_RTE : BinaryIntMCf64Scalar<IL_OP_D_SUB_RTE, int_AMDIL_sub_rte>;
+defm SUB_RTP : BinaryIntMCf64Scalar<IL_OP_D_SUB_RTP, int_AMDIL_sub_rtp>;
+defm SUB_RTN : BinaryIntMCf64Scalar<IL_OP_D_SUB_RTN, int_AMDIL_sub_rtn>;
+defm SUB_RTZ : BinaryIntMCf64Scalar<IL_OP_D_SUB_RTZ, int_AMDIL_sub_rtz>;
+defm MUL_RTE : BinaryIntMCf64Scalar<IL_OP_D_MUL_RTE, int_AMDIL_mul_rte>;
+defm MUL_RTP : BinaryIntMCf64Scalar<IL_OP_D_MUL_RTP, int_AMDIL_mul_rtp>;
+defm MUL_RTN : BinaryIntMCf64Scalar<IL_OP_D_MUL_RTN, int_AMDIL_mul_rtn>;
+defm MUL_RTZ : BinaryIntMCf64Scalar<IL_OP_D_MUL_RTZ, int_AMDIL_mul_rtz>;
+defm MAD_RTE : TernaryIntMCf64Scalar<IL_OP_D_MAD_RTE, int_AMDIL_mad_rte>;
+defm MAD_RTP : TernaryIntMCf64Scalar<IL_OP_D_MAD_RTP, int_AMDIL_mad_rtp>;
+defm MAD_RTN : TernaryIntMCf64Scalar<IL_OP_D_MAD_RTN, int_AMDIL_mad_rtn>;
+defm MAD_RTZ : TernaryIntMCf64Scalar<IL_OP_D_MAD_RTZ, int_AMDIL_mad_rtz>;
+
+def PRINT_GET_ITEM_OFFSET : ILFormat<IL_OP_MOV, (outs GPRXI32:$dst),
+ (ins), !strconcat(IL_OP_MOV.Text, " $dst, r1040.x"),
+ [(set GPRXI32:$dst, (int_AMDIL_get_printf_item_offset))]>;
+let isCall = 1 in {
+def PRINT_INC_ITEM_OFFSETr : ILFormat<IL_OP_I_ADD, (outs),
+ (ins GPRI32:$src), !strconcat(IL_OP_I_ADD.Text, " r1040.x, r1040.x, $src"),
+ [(int_AMDIL_inc_printf_item_offset GPRI32:$src)]>;
+def PRINT_INC_ITEM_OFFSETi : ILFormat<IL_OP_I_ADD, (outs),
+ (ins i32imm:$src), !strconcat(IL_OP_I_ADD.Text, " r1040.x, r1040.x, $src"),
+ [(int_AMDIL_inc_printf_item_offset (i32 imm:$src))]>;
+}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp Wed Sep 12 12:43:34 2012
@@ -28,7 +28,6 @@
: TargetIntrinsicInfo(), mTM(tm)
{
}
-
std::string
AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
unsigned int numTys) const
@@ -50,7 +49,6 @@
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
return Result;
}
-
static bool
checkTruncation(const char *Name, unsigned int& Len)
{
@@ -80,7 +78,6 @@
}
return false;
}
-
// We don't want to support both the OpenCL 1.0 atomics
// and the 1.1 atomics with different names, so we translate
// the 1.0 atomics to the 1.1 naming here if needed.
@@ -102,7 +99,6 @@
buffer[Len] = '\0';
return buffer;
}
-
unsigned int
AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
{
@@ -110,7 +106,7 @@
#include "AMDILGenIntrinsics.inc"
#undef GET_FUNCTION_RECOGNIZER
AMDILIntrinsic::ID IntrinsicID
- = (AMDILIntrinsic::ID)Intrinsic::not_intrinsic;
+ = (AMDILIntrinsic::ID)Intrinsic::not_intrinsic;
if (checkTruncation(Name, Len)) {
char *buffer = atomTranslateIfNeeded(Name, Len);
IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", buffer);
@@ -118,15 +114,11 @@
} else {
IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
}
- if (!isValidIntrinsic(IntrinsicID)) {
- return 0;
- }
if (IntrinsicID != (AMDILIntrinsic::ID)Intrinsic::not_intrinsic) {
return IntrinsicID;
}
return 0;
}
-
bool
AMDILIntrinsicInfo::isOverloaded(unsigned IntrID) const
{
@@ -138,7 +130,6 @@
#include "AMDILGenIntrinsics.inc"
#undef GET_INTRINSIC_OVERLOAD_TABLE
}
-
/// This defines the "getAttributes(ID id)" method.
#define GET_INTRINSIC_ATTRIBUTES
#include "AMDILGenIntrinsics.inc"
@@ -167,27 +158,8 @@
}
return cast<Function>(M->getOrInsertFunction(getName(IntrID),
- FunctionType::get(ResultTy, ArgTys, IsVarArg),
- AList));
-}
-
-/// Because the code generator has to support different SC versions,
-/// this function is added to check that the intrinsic being used
-/// is actually valid. In the case where it isn't valid, the
-/// function call is not translated into an intrinsic and the
-/// fall back software emulated path should pick up the result.
-bool
-AMDILIntrinsicInfo::isValidIntrinsic(unsigned int IntrID) const
-{
- const AMDILSubtarget *stm = mTM->getSubtargetImpl();
- switch (IntrID) {
- default:
- return true;
- case AMDILIntrinsic::AMDIL_convert_f32_i32_rpi:
- case AMDILIntrinsic::AMDIL_convert_f32_i32_flr:
- case AMDILIntrinsic::AMDIL_convert_f32_f16_near:
- case AMDILIntrinsic::AMDIL_convert_f32_f16_neg_inf:
- case AMDILIntrinsic::AMDIL_convert_f32_f16_plus_inf:
- return stm->calVersion() >= CAL_VERSION_SC_139;
- };
+ FunctionType::get(ResultTy,
+ ArgTys,
+ IsVarArg),
+ AList));
}
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsicInfo.h Wed Sep 12 12:43:34 2012
@@ -17,11 +17,9 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Intrinsics.h"
-namespace llvm
-{
+namespace llvm {
class AMDILTargetMachine;
-namespace AMDILIntrinsic
-{
+namespace AMDILIntrinsic {
enum ID {
last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1,
#define GET_INTRINSIC_ENUM_VALUES
@@ -29,13 +27,10 @@
#undef GET_INTRINSIC_ENUM_VALUES
, num_AMDIL_intrinsics
};
-
}
-
-class AMDILIntrinsicInfo : public TargetIntrinsicInfo
-{
- AMDILTargetMachine *mTM;
+class AMDILIntrinsicInfo : public TargetIntrinsicInfo {
+AMDILTargetMachine *mTM;
public:
AMDILIntrinsicInfo(AMDILTargetMachine *tm);
std::string getName(unsigned int IntrId, Type **Tys = 0,
@@ -46,7 +41,7 @@
Type **Tys = 0,
unsigned int numTys = 0) const;
bool isValidIntrinsic(unsigned int) const;
-}; // AMDILIntrinsicInfo
+}; // AMDILIntrinsicInfo
}
#endif // _AMDIL_INTRINSICS_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILIntrinsics.td Wed Sep 12 12:43:34 2012
@@ -13,6 +13,20 @@
let TargetPrefix = "AMDIL", isTarget = 1 in {
//------------- Synchronization Functions - OpenCL 6.11.9 --------------------//
+ def int_AMDIL_gws : GCCBuiltin<"__amdil_gws">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_gws_global : GCCBuiltin<"__amdil_gws_global">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_gws_local : GCCBuiltin<"__amdil_gws_local">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_gws_region : GCCBuiltin<"__amdil_gws_region">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_gws_global_local : GCCBuiltin<"__amdil_gws_global_local">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_gws_global_region : GCCBuiltin<"__amdil_gws_global_region">,
+ UnaryIntNoRetInt;
+ def int_AMDIL_gws_local_region : GCCBuiltin<"__amdil_gws_local_region">,
+ UnaryIntNoRetInt;
def int_AMDIL_fence : GCCBuiltin<"__amdil_mem_fence">,
UnaryIntNoRetInt;
def int_AMDIL_fence_global : GCCBuiltin<"__amdil_mem_fence_global">,
@@ -61,11 +75,11 @@
def int_AMDIL_early_exit : GCCBuiltin<"__amdil_early_exit">,
UnaryIntNoRetInt;
+let Properties = [IntrNoMem] in {
def int_AMDIL_cmov_logical : GCCBuiltin<"__amdil_cmov_logical">,
TernaryIntInt;
def int_AMDIL_fabs : GCCBuiltin<"__amdil_fabs">, UnaryIntFloat;
def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
-
def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
TernaryIntInt;
def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
@@ -116,16 +130,23 @@
BinaryIntInt;
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
BinaryIntInt;
+}
def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
- BinaryIntInt;
+ BinaryIntInt;
def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
BinaryIntInt;
+
+let Properties = [IntrNoMem] in {
def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
BinaryIntFloat;
+}
+
def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
- BinaryIntInt;
+ BinaryIntInt;
def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
- BinaryIntInt;
+ BinaryIntInt;
+
+let Properties = [IntrNoMem] in {
def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
BinaryIntFloat;
def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
@@ -164,12 +185,20 @@
UnaryIntFloat;
def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
UnaryIntFloat;
+}
+
def int_AMDIL_ftz : GCCBuiltin<"__amdil_ftz">,
UnaryIntFloat;
+
+let Properties = [IntrNoMem] in {
def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
def int_AMDIL_div_precise : GCCBuiltin<"__amdil_div_precise">, BinaryIntFloat;
+}
+
def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
+
+let Properties = [IntrNoMem] in {
def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
UnaryIntFloat;
def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
@@ -198,52 +227,56 @@
UnaryIntFloat;
def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
TernaryIntFloat;
- def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
- Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i32_ty], []>;
+}
+
+ def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_i32_ty], []>;
- def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
- Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
+ def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+
def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
- ConvertIntITOF;
+ ConvertIntITOF;
def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
- ConvertIntFTOI;
+ ConvertIntFTOI;
def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
- ConvertIntFTOI;
+ ConvertIntFTOI;
def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
- ConvertIntFTOI;
+ ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
- ConvertIntFTOI;
+ ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
- ConvertIntFTOI;
+ ConvertIntFTOI;
def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
- ConvertIntFTOI;
- def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
+ ConvertIntFTOI;
+ def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
- ConvertIntITOF;
+ ConvertIntITOF;
def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
- ConvertIntITOF;
+ ConvertIntITOF;
def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
- ConvertIntITOF;
+ ConvertIntITOF;
def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
- ConvertIntITOF;
+ ConvertIntITOF;
def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
- Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
- llvm_v2f32_ty, llvm_float_ty], []>;
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty, llvm_float_ty], []>;
def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
- Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
- llvm_v2f32_ty], []>;
+ Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
+ llvm_v2f32_ty], []>;
def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
- Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty], []>;
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
- Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty], []>;
+ Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], []>;
//===---------------------- Image functions begin ------------------------===//
def int_AMDIL_image1d_write : GCCBuiltin<"__amdil_image1d_write">,
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
@@ -260,7 +293,7 @@
def int_AMDIL_image1d_info1 : GCCBuiltin<"__amdil_image1d_info1">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
- def int_AMDIL_image1d_array_write : GCCBuiltin<"__amdil_image1d_array_write">,
+ def int_AMDIL_image1d_array_write : GCCBuiltin<"__amdil_image1d_array_write">,
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
def int_AMDIL_image1d_array_read_norm : GCCBuiltin<"__amdil_image1d_array_read_norm">,
@@ -275,7 +308,7 @@
def int_AMDIL_image1d_array_info1 : GCCBuiltin<"__amdil_image1d_array_info1">,
Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>;
- def int_AMDIL_image1d_buffer_write : GCCBuiltin<"__amdil_image1d_buffer_write">,
+ def int_AMDIL_image1d_buffer_write : GCCBuiltin<"__amdil_image1d_buffer_write">,
Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>;
def int_AMDIL_image1d_buffer_load : GCCBuiltin<"__amdil_image1d_buffer_load">,
@@ -346,25 +379,29 @@
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>;
def int_AMDIL_get_global_id : GCCBuiltin<"__amdil_get_global_id_int">,
- Intrinsic<[llvm_v4i32_ty], [], []>;
+ Intrinsic<[llvm_v4i32_ty], [], []>;
def int_AMDIL_get_local_id : GCCBuiltin<"__amdil_get_local_id_int">,
- Intrinsic<[llvm_v4i32_ty], [], []>;
+ Intrinsic<[llvm_v4i32_ty], [], []>;
def int_AMDIL_get_group_id : GCCBuiltin<"__amdil_get_group_id_int">,
- Intrinsic<[llvm_v4i32_ty], [], []>;
+ Intrinsic<[llvm_v4i32_ty], [], []>;
def int_AMDIL_get_num_groups : GCCBuiltin<"__amdil_get_num_groups_int">,
- Intrinsic<[llvm_v4i32_ty], [], []>;
+ Intrinsic<[llvm_v4i32_ty], [], []>;
def int_AMDIL_get_local_size : GCCBuiltin<"__amdil_get_local_size_int">,
- Intrinsic<[llvm_v4i32_ty], [], []>;
+ Intrinsic<[llvm_v4i32_ty], [], []>;
def int_AMDIL_get_global_size : GCCBuiltin<"__amdil_get_global_size_int">,
- Intrinsic<[llvm_v4i32_ty], [], []>;
+ Intrinsic<[llvm_v4i32_ty], [], []>;
def int_AMDIL_get_global_offset : GCCBuiltin<"__amdil_get_global_offset_int">,
- Intrinsic<[llvm_v4i32_ty], [], []>;
+ Intrinsic<[llvm_v4i32_ty], [], []>;
def int_AMDIL_get_work_dim : GCCBuiltin<"get_work_dim">,
- Intrinsic<[llvm_i32_ty], [], []>;
+ Intrinsic<[llvm_i32_ty], [], []>;
def int_AMDIL_get_printf_offset : GCCBuiltin<"__amdil_get_printf_offset">,
- Intrinsic<[llvm_anyint_ty], []>;
+ Intrinsic<[llvm_anyint_ty], []>;
def int_AMDIL_get_printf_size : GCCBuiltin<"__amdil_get_printf_size">,
- Intrinsic<[llvm_anyint_ty], []>;
+ Intrinsic<[llvm_anyint_ty], []>;
+ def int_AMDIL_get_printf_item_offset : GCCBuiltin<"__amdil_get_printf_item_offset">,
+ Intrinsic<[llvm_anyint_ty], []>;
+ def int_AMDIL_inc_printf_item_offset : GCCBuiltin<"__amdil_inc_printf_item_offset">,
+ Intrinsic<[], [llvm_anyint_ty]>;
/// Intrinsics for atomic instructions with no return value
/// Signed 32 bit integer atomics for global address space
@@ -393,7 +430,73 @@
def int_AMDIL_atomic_xor_gi32_noret : GCCBuiltin<"__atomic_xor_gi32_noret">,
BinaryAtomicIntNoRet;
-
+// atomic stores
+def int_AMDIL_atomic_store_gv4i32
+ : GCCBuiltin<"__atomic_store_explicit_gv4i32">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gv4u32
+ : GCCBuiltin<"__atomic_store_explicit_gv4u32">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gv2i32
+ : GCCBuiltin<"__atomic_store_explicit_gv2i32">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gv2u32
+ : GCCBuiltin<"__atomic_store_explicit_gv2u32">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gi64 : GCCBuiltin<"__atomic_store_explicit_gi64">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gu64 : GCCBuiltin<"__atomic_store_explicit_gu64">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gi32 : GCCBuiltin<"__atomic_store_explicit_gi32">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gu32 : GCCBuiltin<"__atomic_store_explicit_gu32">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gi16 : GCCBuiltin<"__atomic_store_explicit_gi16">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gu16 : GCCBuiltin<"__atomic_store_explicit_gu16">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i16_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gi8 : GCCBuiltin<"__atomic_store_explicit_gi8">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+def int_AMDIL_atomic_store_gu8 : GCCBuiltin<"__atomic_store_explicit_gu8">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_i8_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
+
+// atomic loads
+def int_AMDIL_atomic_load_gv4i32 : GCCBuiltin<"__atomic_load_explicit_gv4i32">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gv4u32 : GCCBuiltin<"__atomic_load_explicit_gv4u32">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gv2i32 : GCCBuiltin<"__atomic_load_explicit_gv2i32">,
+ Intrinsic<[llvm_v2i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gv2u32 : GCCBuiltin<"__atomic_load_explicit_gv2u32">,
+ Intrinsic<[llvm_v2i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gi64 : GCCBuiltin<"__atomic_load_explicit_gi64">,
+ Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gu64 : GCCBuiltin<"__atomic_load_explicit_gu64">,
+ Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gi32 : GCCBuiltin<"__atomic_load_explicit_gi32">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gu32 : GCCBuiltin<"__atomic_load_explicit_gu32">,
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gi16 : GCCBuiltin<"__atomic_load_explicit_gi16">,
+ Intrinsic<[llvm_i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gu16 : GCCBuiltin<"__atomic_load_explicit_gu16">,
+ Intrinsic<[llvm_i16_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gi8 : GCCBuiltin<"__atomic_load_explicit_gi8">,
+ Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
+def int_AMDIL_atomic_load_gu8 : GCCBuiltin<"__atomic_load_explicit_gu8">,
+ Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
/// Unsigned 32 bit integer atomics for global address space
def int_AMDIL_atomic_add_gu32_noret : GCCBuiltin<"__atomic_add_gu32_noret">,
@@ -598,13 +701,13 @@
BinaryAtomicIntNoRet;
def int_AMDIL_get_cycle_count : GCCBuiltin<"__amdil_get_cycle_count">,
- VoidIntLong;
+ VoidIntLong;
def int_AMDIL_compute_unit_id : GCCBuiltin<"__amdil_compute_unit_id">,
- VoidIntInt;
+ VoidIntInt;
def int_AMDIL_wavefront_id : GCCBuiltin<"__amdil_wavefront_id">,
- VoidIntInt;
+ VoidIntInt;
/// Intrinsics for atomic instructions with a return value
@@ -730,14 +833,12 @@
def int_AMDIL_atomic_xor_ru32 : GCCBuiltin<"__atomic_xor_ru32">,
BinaryAtomicInt;
-/// Semaphore signal/wait/init
-def int_AMDIL_semaphore_init : GCCBuiltin<"__amdil_semaphore_init">,
- UnaryAtomicIntNoRet;
+/// Semaphore signal/wait
def int_AMDIL_semaphore_wait : GCCBuiltin<"__amdil_semaphore_wait">,
VoidAtomicIntNoRet;
def int_AMDIL_semaphore_signal : GCCBuiltin<"__amdil_semaphore_signal">,
VoidAtomicIntNoRet;
-def int_AMDIL_semaphore_size : GCCBuiltin<"__amdil_max_semaphore_size">,
+def int_AMDIL_max_semaphore_size : GCCBuiltin<"__amdil_max_semaphore_size">,
VoidIntInt;
/// Intrinsics for atomic instructions with no return value
@@ -1095,20 +1196,20 @@
def int_AMDIL_med3_u32 : GCCBuiltin<"__amdil_umed3">,
TernaryIntInt;
def int_AMDIL_med3 : GCCBuiltin<"__amdil_med3">,
- TernaryIntFloat;
- def int_AMDIL_class : GCCBuiltin<"__amdil_class">,
- Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
- def int_AMDIL_trig_preop_f64 : GCCBuiltin<"__amdil_trig_preop">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_float_ty], []>;
+ TernaryIntFloat;
+def int_AMDIL_class : GCCBuiltin<"__amdil_class">,
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
+def int_AMDIL_trig_preop_f64 : GCCBuiltin<"__amdil_trig_preop">,
+ Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_float_ty], []>;
def int_AMDIL_frexp_exp : GCCBuiltin<"__amdil_frexp_exp">,
- ConvertIntFTOI;
+ ConvertIntFTOI;
def int_AMDIL_frexp_mant : GCCBuiltin<"__amdil_frexp_mant">,
- UnaryIntFloat;
+ UnaryIntFloat;
def int_AMDIL_media_sad16 : GCCBuiltin<"__amdil_sad16">,
- TernaryIntInt;
+ TernaryIntInt;
def int_AMDIL_media_sad32 : GCCBuiltin<"__amdil_sad32">,
- TernaryIntInt;
+ TernaryIntInt;
def int_AMDIL_media_msad : GCCBuiltin<"__amdil_msad">,
TernaryIntInt;
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernel.h Wed Sep 12 12:43:34 2012
@@ -20,17 +20,20 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineFunction.h"
-namespace llvm
-{
+namespace llvm {
class AMDILSubtarget;
class AMDILTargetMachine;
/// structure that holds information for a single local/region address array
typedef struct _AMDILArrayMemRec {
- uint32_t vecSize; // size of each vector
- uint32_t offset; // offset into the memory section
- uint32_t align; // alignment
- bool isHW; // flag to specify if HW is used or SW is used
- bool isRegion; // flag to specify if GDS is used or not
+ const Value* base;
+ uint32_t vecSize; // size of each vector
+ uint32_t offset; // offset into the memory section
+ uint32_t align; // alignment
+ // ID of the local buffer this array resides in.
+ // Currently only used for hardware supported local buffers.
+ uint32_t resourceID;
+ bool isHW; // flag to specify if HW is used or SW is used
+ bool isRegion; // flag to specify if GDS is used or not
} AMDILArrayMem;
/// structure that holds information about a constant address
@@ -39,11 +42,11 @@
const llvm::Value *base;
uint32_t size;
uint32_t offset;
- uint32_t align; // alignment
- uint32_t cbNum; // value of 0 means that it does not use hw CB
- bool isArray; // flag to specify that this is an array
- bool isArgument; // flag to specify that this is for a kernel argument
- bool usesHardware; // flag to specify if hardware CB is used or not
+ uint32_t align; // alignment
+ uint32_t cbNum; // value of 0 means that it does not use hw CB
+ bool isArray; // flag to specify that this is an array
+ bool isArgument; // flag to specify that this is for a kernel argument
+ bool usesHardware; // flag to specify if hardware CB is used or not
std::string name;
} AMDILConstPtr;
@@ -51,23 +54,20 @@
/// arrays in the kernel
typedef struct _AMDILLocalArgRec {
llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS> local;
- std::string name; // Kernel Name
+ std::string name; // Kernel Name
} AMDILLocalArg;
-
-
/// Structure that holds information for each kernel argument
typedef struct _AMDILkernelArgRec {
- uint32_t reqGroupSize[3]; // x,y,z sizes for group.
- uint32_t reqRegionSize[3]; // x,y,z sizes for region.
- llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo; // information about argumetns.
- bool mHasRWG; // true if reqd_work_group_size is specified.
- bool mHasRWR; // true if reqd_work_region_size is specified.
+ uint32_t reqGroupSize[3]; // x,y,z sizes for group.
+ uint32_t reqRegionSize[3]; // x,y,z sizes for region.
+ llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo; // information about argumetns.
+ bool mHasRWG; // true if reqd_work_group_size is specified.
+ bool mHasRWR; // true if reqd_work_region_size is specified.
} AMDILKernelAttr;
/// Structure that holds information for each kernel
-class AMDILKernel
-{
+class AMDILKernel {
public:
AMDILKernel(const std::string& name, bool clKernel) :
curSize(0),
@@ -82,24 +82,26 @@
rvgv(NULL) {
memset(constSizes, 0, sizeof(constSizes));
}
- uint32_t curSize; // Current size of local memory, hardware + software emulated
- uint32_t curRSize; // Current size of region memory, hardware + software emulated
- uint32_t curHWSize; // Current size of hardware local memory
- uint32_t curHWRSize; // Current size of hardware region memory
- uint32_t constSize; // Current size of software constant memory
- bool mKernel; // Flag to specify if this represents an OpenCL kernel or not
- std::string mName; // Name of current kernel
- AMDILKernelAttr *sgv; // pointer to kernel attributes
- AMDILLocalArg *lvgv; // pointer to local attributes
- AMDILLocalArg *rvgv; // pointer to region attributes
- llvm::SmallVector<struct _AMDILConstPtrRec, DEFAULT_VEC_SLOTS> constPtr; // vector containing constant pointer information
- uint32_t constSizes[HW_MAX_NUM_CB]; // Size of each constant buffer
- llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly; // set that specifies the read-only images for the kernel
- llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly; // set that specifies the write-only images for the kernel
+ uint32_t curSize; // Current size of local memory, hardware + software emulated
+ uint32_t curRSize; // Current size of region memory, hardware + software emulated
+ uint32_t curHWSize; // Current size of hardware local memory
+ uint32_t curHWRSize; // Current size of hardware region memory
+ uint32_t constSize; // Current size of software constant memory
+ bool mKernel; // Flag to specify if this represents an OpenCL kernel or not
+ std::string mName; // Name of current kernel
+ AMDILKernelAttr *sgv; // pointer to kernel attributes
+ AMDILLocalArg *lvgv; // pointer to local attributes
+ AMDILLocalArg *rvgv; // pointer to region attributes
+ llvm::SmallVector<struct _AMDILConstPtrRec, DEFAULT_VEC_SLOTS> constPtr; // vector containing constant pointer information
+ uint32_t constSizes[HW_MAX_NUM_CB]; // Size of each constant buffer
+ llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly; // set that specifies the read-only images for the kernel
+ llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly; // set that specifies the write-only images for the kernel
llvm::SmallVector<std::pair<uint32_t, const llvm::Constant *>,
- DEFAULT_VEC_SLOTS> CPOffsets; // Vector of constant pool offsets
- typedef llvm::SmallVector<struct _AMDILConstPtrRec, DEFAULT_VEC_SLOTS>::iterator constptr_iterator; // iterator for constant pointers
- typedef llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS>::iterator arraymem_iterator; // iterator for the memory array
-}; // AMDILKernel
+ DEFAULT_VEC_SLOTS> CPOffsets; // Vector of constant pool offsets
+ typedef llvm::SmallVector<struct _AMDILConstPtrRec,
+ DEFAULT_VEC_SLOTS>::iterator constptr_iterator; // iterator for constant pointers
+ typedef llvm::SmallVector<AMDILArrayMem *,
+ DEFAULT_VEC_SLOTS>::iterator arraymem_iterator; // iterator for the memory array
+}; // AMDILKernel
} // end llvm namespace
#endif // _AMDIL_KERNEL_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.cpp Wed Sep 12 12:43:34 2012
@@ -42,7 +42,11 @@
#define NUM_EXTRA_SLOTS_PER_IMAGE 1
void
-printRegName(AMDILAsmPrinter *RegNames, unsigned reg, OSTREAM_TYPE &O, bool dst, bool dupe = false)
+printRegName(AMDILAsmPrinter *RegNames,
+ unsigned reg,
+ OSTREAM_TYPE &O,
+ bool dst,
+ bool dupe = false)
{
if (reg >= AMDIL::Rx1 && reg < AMDIL::Rxy1) {
O << RegNames->getRegisterName(reg) << ".x,";
@@ -104,8 +108,7 @@
};
}
}
-static bool errorPrint(const char *ptr, OSTREAM_TYPE &O)
-{
+static bool errorPrint(const char *ptr, OSTREAM_TYPE &O) {
if (ptr[0] == 'E') {
O << ";error:" << ptr << "\n";
} else {
@@ -113,36 +116,30 @@
}
return false;
}
-static bool semaPrint(uint32_t val, OSTREAM_TYPE &O)
-{
+static bool semaPrint(uint32_t val, OSTREAM_TYPE &O) {
O << "dcl_semaphore_id(" << val << ")\n";
return false;
}
-static bool arenaPrint(uint32_t val, OSTREAM_TYPE &O)
-{
+static bool arenaPrint(uint32_t val, OSTREAM_TYPE &O) {
if (val >= ARENA_SEGMENT_RESERVED_UAVS) {
O << "dcl_arena_uav_id(" << val << ")\n";
}
return false;
}
-
-static bool uavPrint(uint32_t val, OSTREAM_TYPE &O)
-{
+static bool uavPrint(uint32_t val, OSTREAM_TYPE &O) {
if (val < 8 || val == 11) {
O << "dcl_raw_uav_id(" << val << ")\n";
}
return false;
}
-
-static bool uavPrintSI(uint32_t val, OSTREAM_TYPE &O)
-{
- O << "dcl_typeless_uav_id(" << val << ")_stride(4)_length(4)_access(read_write)\n";
+static bool uavPrintSI(uint32_t val, OSTREAM_TYPE &O) {
+ O << "dcl_typeless_uav_id(" << val <<
+ ")_stride(4)_length(4)_access(read_write)\n";
return false;
}
-
static bool
-printfPrint(std::pair<const std::string, PrintfInfo *> &data, OSTREAM_TYPE &O)
-{
+printfPrint(std::pair<const std::string,
+ PrintfInfo *> &data, OSTREAM_TYPE &O) {
O << ";printf_fmt:" << data.second->getPrintfID();
// Number of operands
O << ":" << data.second->getNumOperands();
@@ -166,13 +163,10 @@
O << ";\n"; // c_str() is cheap way to trim
return false;
}
-
-
void AMDILKernelManager::updatePtrArg(Function::const_arg_iterator Ip,
int numWriteImages, int raw_uav_buffer,
int counter, bool isKernel,
- const Function *F)
-{
+ const Function *F) {
assert(F && "Cannot pass a NULL Pointer to F!");
assert(Ip->getType()->isPointerTy() &&
"Argument must be a pointer to be passed into this function!\n");
@@ -187,7 +181,8 @@
if ((Align & (Align - 1))) Align = NextPowerOf2(Align);
}
ptrArg += Ip->getName().str() + ":" + getTypeName(PT, symTab, mMFI,
- mMFI->isSignedIntType(Ip)) + ":1:1:" +
+ mMFI->isSignedIntType(Ip))
+ + ":1:1:" +
itostr(counter * 16) + ":";
if (mSTM->overridesFlatAS()) {
MemType = "flat";
@@ -215,7 +210,8 @@
mMFI->uav_insert(ptrID);
break;
case AMDILAS::CONSTANT_ADDRESS: {
- if (isKernel && mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ if (isKernel &&
+ mSTM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
const AMDILKernel* t = mAMI->getKernel(F->getName());
if (mAMI->usesHWConstant(t, Ip->getName())) {
MemType = /*(isSI) ? "uc\0" :*/ "hc\0";
@@ -255,7 +251,9 @@
case AMDILAS::LOCAL_ADDRESS:
if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
MemType = "hl\0";
- ptrID = 1;
+ // size of local mem pointed to by ptr type args are unknown,
+ // so go to default lds buffer
+ ptrID = DEFAULT_LDS_ID;
mMFI->setUsesLDS();
} else {
MemType = "l\0";
@@ -280,7 +278,6 @@
ptrArg += (mMFI->isRestrictPointer(Ip)) ? ":1" : ":0";
mMFI->addMetadata(ptrArg, true);
}
-
AMDILKernelManager::AMDILKernelManager(AMDILTargetMachine *TM)
{
mTM = TM;
@@ -290,12 +287,9 @@
mMF = NULL;
clear();
}
-
-AMDILKernelManager::~AMDILKernelManager()
-{
+AMDILKernelManager::~AMDILKernelManager() {
clear();
}
-
void
AMDILKernelManager::setMF(MachineFunction *MF)
{
@@ -303,24 +297,19 @@
mMFI = MF->getInfo<AMDILMachineFunctionInfo>();
mAMI = &(MF->getMMI().getObjFileInfo<AMDILModuleInfo>());
}
-
-void AMDILKernelManager::clear()
-{
+void AMDILKernelManager::clear() {
mUniqueID = 0;
mWasKernel = false;
mHasImageWrite = false;
mHasOutputInst = false;
}
-
-bool AMDILKernelManager::useCompilerWrite(const MachineInstr *MI)
-{
+bool AMDILKernelManager::useCompilerWrite(const MachineInstr *MI) {
return (MI->getOpcode() == AMDIL::RETURN && wasKernel() && !mHasImageWrite
&& !mHasOutputInst);
}
-
void AMDILKernelManager::processArgMetadata(OSTREAM_TYPE &O,
- uint32_t buf,
- bool isKernel)
+ uint32_t buf,
+ bool isKernel)
{
const Function *F = mMF->getFunction();
const char * symTab = "NoSymTab";
@@ -329,7 +318,6 @@
if (F->hasStructRetAttr()) {
assert(Ip != Ep && "Invalid struct return fucntion!");
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INTERNAL_ERROR]);
++Ip;
}
uint32_t mCBSize = 0;
@@ -352,8 +340,12 @@
Type *cType = Ip->getType();
if (cType->isIntOrIntVectorTy() || cType->isFPOrFPVectorTy()) {
std::string argMeta(";value:");
- argMeta += Ip->getName().str() + ":" + getTypeName(cType, symTab, mMFI
- , mMFI->isSignedIntType(Ip)) + ":";
+ argMeta += Ip->getName().str() + ":" + getTypeName(cType,
+ symTab,
+ mMFI
+ ,
+ mMFI->isSignedIntType(
+ Ip)) + ":";
int bitsize = cType->getPrimitiveSizeInBits();
int numEle = 1;
if (cType->getTypeID() == Type::VectorTyID) {
@@ -391,24 +383,24 @@
if (mSTM->device()->isSupported(AMDILDeviceInfo::Images)) {
std::string imageArg(";image:");
imageArg += Ip->getName().str() + ":";
- if (i1d) imageArg += "1D:";
+ if (i1d) imageArg += "1D:";
else if (i1da) imageArg += "1DA:";
else if (i1db) imageArg += "1DB:";
- else if (i2d) imageArg += "2D:";
+ else if (i2d) imageArg += "2D:";
else if (i2da) imageArg += "2DA:";
- else if (i3d) imageArg += "3D:";
+ else if (i3d) imageArg += "3D:";
if (isKernel) {
if (mAMI->isReadOnlyImage (mMF->getFunction()->getName(),
(ROArg + WOArg))) {
imageArg += "RO:" + itostr(ROArg);
O << "dcl_resource_id(" << ROArg << ")_type(";
- if (i1d) O << "1d";
+ if (i1d) O << "1d";
else if (i1da) O << "1darray";
else if (i1db) O << "buffer";
- else if (i2d) O << "2d";
+ else if (i2d) O << "2d";
else if (i2da) O << "2darray";
- else if (i3d) O << "3d";
+ else if (i3d) O << "3d";
O << ")_fmtx(unknown)_fmty(unknown)"
<< "_fmtz(unknown)_fmtw(unknown)\n";
++ROArg;
@@ -418,12 +410,12 @@
offset += WOArg;
imageArg += "WO:" + itostr(offset & 0x7);
O << "dcl_uav_id(" << ((offset) & 0x7) << ")_type(";
- if (i1d) O << "1d";
+ if (i1d) O << "1d";
else if (i1da) O << "1darray";
else if (i1db) O << "buffer";
- else if (i2d) O << "2d";
+ else if (i2d) O << "2d";
else if (i2da) O << "2darray";
- else if (i3d) O << "3d";
+ else if (i3d) O << "3d";
O << ")_fmtx(uint)\n";
++WOArg;
} else {
@@ -455,8 +447,9 @@
F);
++mCBSize;
}
- } else if (CT->getTypeID() == Type::StructTyID
- && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
+ }
+ else if (CT->getTypeID() == Type::StructTyID
+ && PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) {
const TargetData *td = mTM->getTargetData();
const StructLayout *sl = td->getStructLayout(dyn_cast<StructType>(CT));
int bytesize = sl->getSizeInBytes();
@@ -494,11 +487,9 @@
++Ip;
}
}
-
void AMDILKernelManager::printHeader(AMDILAsmPrinter *AsmPrinter,
OSTREAM_TYPE &O,
- const std::string &name)
-{
+ const std::string &name) {
mName = name;
std::string kernelName;
kernelName = (mSTM->isApple()) ? "__OpenCL_" + name + "_kernel"
@@ -512,9 +503,7 @@
}
O << "mov " << AsmPrinter->getRegisterName(AMDIL::SP) << ", l1.0000\n";
}
-
-void AMDILKernelManager::printGroupSize(OSTREAM_TYPE& O)
-{
+void AMDILKernelManager::printGroupSize(OSTREAM_TYPE& O) {
// The HD4XXX generation of hardware does not support a 3D launch, so we need
// to use dcl_num_thread_per_group to specify the launch size. If the launch
// size is specified via a kernel attribute, we print it here. Otherwise we
@@ -531,14 +520,14 @@
O << "dcl_num_thread_per_group "
<< kernel->sgv->reqGroupSize[0] << ", "
<< kernel->sgv->reqGroupSize[1] << ", "
- << kernel->sgv->reqGroupSize[2] << " \n";
+ << kernel->sgv->reqGroupSize[2] << "\n";
} else {
// If the kernel uses local memory, then the kernel is being
// compiled in single wavefront mode. So we have to generate code slightly
// different.
O << "dcl_num_thread_per_group "
<< mSTM->device()->getWavefrontSize()
- << ", 1, 1 \n";
+ << ", 1, 1\n";
}
} else {
// Otherwise we generate for devices that support 3D launch natively. If
@@ -549,65 +538,95 @@
O << "dcl_num_thread_per_group "
<< kernel->sgv->reqGroupSize[0] << ", "
<< kernel->sgv->reqGroupSize[1] << ", "
- << kernel->sgv->reqGroupSize[2] << " \n";
+ << kernel->sgv->reqGroupSize[2] << "\n";
} else {
// Otherwise we specify the largest workgroup size that can be launched.
O << "dcl_max_thread_per_group " <<
- kernel->sgv->reqGroupSize[0]
- * kernel->sgv->reqGroupSize[1]
- * kernel->sgv->reqGroupSize[2] << " \n";
+ kernel->sgv->reqGroupSize[0]
+ * kernel->sgv->reqGroupSize[1]
+ * kernel->sgv->reqGroupSize[2] << "\n";
+ }
+
+ if (kernel->sgv->mHasRWR) {
+ O << "dcl_gws_thread_count " <<
+ kernel->sgv->reqRegionSize[0]
+ * kernel->sgv->reqRegionSize[1]
+ * kernel->sgv->reqRegionSize[2] << "\n";
}
} else {
- O << "dcl_max_thread_per_group " << mSTM->device()->getWavefrontSize() << "\n";
+ O << "dcl_max_thread_per_group " << mSTM->device()->getWavefrontSize() <<
+ "\n";
}
}
// Now that we have specified the workgroup size, lets declare the local
// memory size. If we are using hardware and we know the value at compile
// time, then we need to declare the correct value. Otherwise we should just
// declare the maximum size.
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem)
+ && mMFI->usesLDS()) {
size_t kernelLocalSize = (kernel->curHWSize + 3) & ~3;
if (kernelLocalSize > mSTM->device()->getMaxLDSSize()) {
mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_LOCAL_RESOURCES]);
}
+ // declare non-default local buffers
+ unsigned nLocals = mAMI->numLocalBuffers();
+ std::vector<unsigned> localBufferSizes(nLocals, 0);
+ AMDILLocalArg* locals = kernel->lvgv;
+ llvm::SmallVector<AMDILArrayMem *, DEFAULT_VEC_SLOTS>::iterator ib, ie;
+ for (ib = locals->local.begin(), ie = locals->local.end(); ib != ie;
+ ++ib) {
+ AMDILArrayMem* local = *ib;
+ if (!local->isHW || local->isRegion) {
+ continue;
+ }
+ assert(local->resourceID != 0 && "bad resourceID");
+ uint32_t size = (local->vecSize + 3) & ~3;
+ localBufferSizes[local->resourceID-DEFAULT_LDS_ID] += size;
+ }
+ unsigned nDefSize = 0;
+ for (unsigned i = 1; i < nLocals; ++i) {
+ unsigned size = localBufferSizes[i];
+ if (size > 0) {
+ O << "dcl_lds_id(" << DEFAULT_LDS_ID + i << ") " << size << "\n";
+ nDefSize += size;
+ }
+ }
// If there is a local pointer as a kernel argument, we don't know the size
// at compile time, so we reserve all of the space.
- if (mMFI->usesLDS() && (mMFI->hasLDSArg() || !kernelLocalSize)) {
- O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
- << mSTM->device()->getMaxLDSSize() << "\n";
- mMFI->setUsesMem(AMDILDevice::LDS_ID);
- } else if (kernelLocalSize) {
- // We know the size, so lets declare it correctly.
- O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") "
- << kernelLocalSize << "\n";
- mMFI->setUsesMem(AMDILDevice::LDS_ID);
+ unsigned defLocalSize = localBufferSizes[0];
+ if (mMFI->hasLDSArg() || !kernelLocalSize) {
+ defLocalSize = mSTM->device()->getMaxLDSSize() - nDefSize;
+ }
+ // decalre the default local buffer
+ if (defLocalSize > 0) {
+ O << "dcl_lds_id(" << DEFAULT_LDS_ID << ") " << defLocalSize << "\n";
}
+ mMFI->setUsesMem(AMDILDevice::LDS_ID);
}
// If the device supports the region memory extension, which maps to our
// hardware GDS memory, then lets declare it so we can use it later on.
if (mSTM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
size_t kernelGDSSize = (kernel->curHWRSize + 3) & ~3;
if (kernelGDSSize > mSTM->device()->getMaxGDSSize()) {
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_REGION_RESOURCES]);
+ mMFI->addErrorMsg(
+ amd::CompilerErrorMessage[INSUFFICIENT_REGION_RESOURCES]);
}
// If there is a region pointer as a kernel argument, we don't know the size
// at compile time, so we reserved all of the space.
if (mMFI->usesGDS() && (mMFI->hasGDSArg() || !kernelGDSSize)) {
O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
- ") " << mSTM->device()->getMaxGDSSize() << "\n";
+ ") " << mSTM->device()->getMaxGDSSize() << "\n";
mMFI->setUsesMem(AMDILDevice::GDS_ID);
} else if (kernelGDSSize) {
// We know the size, so lets declare it.
O << "dcl_gds_id(" << DEFAULT_GDS_ID <<
- ") " << kernelGDSSize << "\n";
+ ") " << kernelGDSSize << "\n";
mMFI->setUsesMem(AMDILDevice::GDS_ID);
}
}
}
-
void
-AMDILKernelManager::printDecls(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O)
-{
+AMDILKernelManager::printDecls(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O) {
// If we are a HD4XXX generation device, then we only support a single uav
// surface, so we declare it and leave
if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
@@ -636,7 +655,8 @@
binaryForEach(mMFI->uav_begin(), mMFI->uav_end(), arenaPrint, O);
}
- if (mMFI->sema_size() && !mSTM->device()->usesHardware(AMDILDeviceInfo::Semaphore)) {
+ if (mMFI->sema_size() &&
+ !mSTM->device()->usesHardware(AMDILDeviceInfo::Semaphore)) {
mMFI->addErrorMsg(amd::CompilerErrorMessage[NO_SEMAPHORE_SUPPORT]);
} else {
binaryForEach(mMFI->sema_begin(), mMFI->sema_end(), semaPrint, O);
@@ -677,9 +697,8 @@
}
getIntrinsicSetup(AsmPrinter, O);
}
-
void AMDILKernelManager::getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter,
- OSTREAM_TYPE &O)
+ OSTREAM_TYPE &O)
{
O << "mov r0.__z_, vThreadGrpIdFlat0.x\n"
<< "mov r1022.xyz0, vTidInGrp0.xyz\n";
@@ -687,12 +706,12 @@
O << "mov r1023.xyz0, vThreadGrpId0.xyz\n";
} else {
O << "imul r0.___w, cb0[2].x, cb0[2].y\n"
- // Calculates the local id.
- // Calculates the group id.
- << "umod r1023.x___, r0.z, cb0[2].x\n"
- << "udiv r1023._y__, r0.z, cb0[2].x\n"
- << "umod r1023._y__, r1023.y, cb0[2].y\n"
- << "udiv r1023.__z_, r0.z, r0.w\n";
+ // Calculates the local id.
+ // Calculates the group id.
+ << "umod r1023.x___, r0.z, cb0[2].x\n"
+ << "udiv r1023._y__, r0.z, cb0[2].x\n"
+ << "umod r1023._y__, r1023.y, cb0[2].y\n"
+ << "udiv r1023.__z_, r0.z, r0.w\n";
}
// Calculates the global id.
const AMDILKernel *kernel = mAMI->getKernel(mName);
@@ -745,7 +764,6 @@
<< "i64add " << AsmPrinter->getRegisterName(AMDIL::T2)
<< ".xy__, " << AsmPrinter->getRegisterName(AMDIL::T2)
<< ".xyyy, cb0[4].xyyy\n";
-
} else {
O << "imad " << AsmPrinter->getRegisterName(AMDIL::T2)
<< ".x___, r1023.w, cb0[4].y, cb0[4].x\n";
@@ -760,7 +778,6 @@
<< "i64add " << AsmPrinter->getRegisterName(AMDIL::T1)
<< ".xy__, " << AsmPrinter->getRegisterName(AMDIL::T1)
<< ".xyyy, cb0[3].xyyy\n";
-
} else {
O << "imad " << AsmPrinter->getRegisterName(AMDIL::T1)
<< ".x___, vAbsTidFlat.x, cb0[3].y, cb0[3].x\n";
@@ -781,10 +798,11 @@
O << "imad r1025.xyz0, r1023.xyzz, cb0[10].xyzz, r1022.xyzz\n";
}
}
+ if (!mMFI->printf_empty()) {
+ O << "mov " << AsmPrinter->getRegisterName(AMDIL::PRINTF) << ".x, l0.y\n";
+ }
}
-
-void AMDILKernelManager::printFooter(OSTREAM_TYPE &O)
-{
+void AMDILKernelManager::printFooter(OSTREAM_TYPE &O) {
O << "ret\n";
if (mSTM->isApple()) {
O << "endfunc ; __OpenCL_" << mName << "_kernel\n";
@@ -792,13 +810,12 @@
O << "endfunc ; " << mName << "\n";
}
}
-
void
-AMDILKernelManager::printMetaData(OSTREAM_TYPE &O, uint32_t id, bool kernel)
-{
+AMDILKernelManager::printMetaData(OSTREAM_TYPE &O, uint32_t id, bool kernel) {
if (kernel) {
int kernelId = (mSTM->isApple())
- ? mAMI->getOrCreateFunctionID("__OpenCL_" + mName + "_kernel")
+ ? mAMI->getOrCreateFunctionID(
+ "__OpenCL_" + mName + "_kernel")
: mAMI->getOrCreateFunctionID(mName);
mMFI->addCalledFunc(id);
mUniqueID = kernelId;
@@ -811,40 +828,28 @@
mUniqueID = id;
}
}
-
-void AMDILKernelManager::setKernel(bool kernel)
-{
+void AMDILKernelManager::setKernel(bool kernel) {
mIsKernel = kernel;
if (kernel) {
mWasKernel = mIsKernel;
}
}
-
void AMDILKernelManager::setID(uint32_t id)
{
mUniqueID = id;
}
-
-void AMDILKernelManager::setName(const std::string &name)
-{
+void AMDILKernelManager::setName(const std::string &name) {
mName = name;
}
-
-bool AMDILKernelManager::wasKernel()
-{
+bool AMDILKernelManager::wasKernel() {
return mWasKernel;
}
-
-void AMDILKernelManager::setImageWrite()
-{
+void AMDILKernelManager::setImageWrite() {
mHasImageWrite = true;
}
-
-void AMDILKernelManager::setOutputInst()
-{
+void AMDILKernelManager::setOutputInst() {
mHasOutputInst = true;
}
-
void AMDILKernelManager::printConstantToRegMapping(
AMDILAsmPrinter *RegNames,
unsigned &LII,
@@ -872,7 +877,7 @@
O << "mov ";
if (isImage) {
printRegName(RegNames, mMFI->getArgReg(LII), O, true);
- O << " l" << mMFI->getIntLits(Counter++) << "\n";
+ O << " l" << mMFI->getLitIdx(Counter++) << "\n";
} else {
printRegName(RegNames, mMFI->getArgReg(LII), O, true);
O << " cb" <<Buffer<< "[" <<Counter++<< "]"
@@ -923,7 +928,7 @@
break;
};
if (lit) {
- O << "ishl " ;
+ O << "ishl ";
printRegName(RegNames, mMFI->getArgReg(LII), O, true);
O << " ";
printRegName(RegNames, mMFI->getArgReg(LII), O, false, true);
@@ -939,20 +944,19 @@
}
}
}
-
void
AMDILKernelManager::printCopyStructPrivate(const StructType *ST,
- OSTREAM_TYPE &O,
- size_t stackSize,
- uint32_t Buffer,
- uint32_t mLitIdx,
- uint32_t &Counter)
+ OSTREAM_TYPE &O,
+ size_t stackSize,
+ uint32_t Buffer,
+ uint32_t mLitIdx,
+ uint32_t &Counter)
{
size_t n = ((stackSize + 15) & ~15) >> 4;
for (size_t x = 0; x < n; ++x) {
if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateUAV)) {
O << "uav_raw_store_id(" <<
- mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
+ mSTM->device()->getResourceID(AMDILDevice::SCRATCH_ID)
<< ") mem0, r0.x, cb" << Buffer << "[" << Counter++ << "]\n";
} else if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
O << "ishr r0.y, r0.x, l0.x\n";
@@ -960,23 +964,21 @@
<<"[r0.y], cb" << Buffer << "[" << Counter++ << "]\n";
} else {
O << "uav_raw_store_id(" <<
- mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)
+ mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID)
<< ") mem0, r0.x, cb" << Buffer << "[" << Counter++ << "]\n";
}
O << "iadd r0.x, r0.x, l" << mLitIdx << ".z\n";
}
}
-
-void AMDILKernelManager::printKernelArgs(OSTREAM_TYPE &O)
-{
+void AMDILKernelManager::printKernelArgs(OSTREAM_TYPE &O) {
std::string version(";version:");
version += itostr(mSTM->supportMetadata30() ? AMDIL_MAJOR_VERSION : 2) + ":"
+ itostr(AMDIL_MINOR_VERSION) + ":"
+ itostr(mSTM->supportMetadata30()
? AMDIL_REVISION_NUMBER : AMDIL_20_REVISION_NUMBER);
const AMDILKernel *kernel = mAMI->getKernel(
- (mSTM->isApple() && !mIsKernel)
- ? "__OpenCL_" + mName + "_kernel" : mName);
+ (mSTM->isApple() && !mIsKernel)
+ ? "__OpenCL_" + mName + "_kernel" : mName);
bool isKernel = (kernel) ? kernel->mKernel : false;
if (mSTM->isApple()) {
if (isKernel) {
@@ -1001,11 +1003,13 @@
size_t local = kernel->curSize;
size_t hwlocal = ((kernel->curHWSize + 3) & (~0x3));
bool usehwlocal = mSTM->device()->usesHardware(AMDILDeviceInfo::LocalMem);
- bool usehwprivate = mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem);
- bool useuavprivate = mSTM->device()->isSupported(AMDILDeviceInfo::PrivateUAV);
+ bool usehwprivate = mSTM->device()->usesHardware(
+ AMDILDeviceInfo::PrivateMem);
+ bool useuavprivate = mSTM->device()->isSupported(
+ AMDILDeviceInfo::PrivateUAV);
if (isKernel) {
O << ";memory:" << ((usehwprivate) ?
- (useuavprivate) ? "uav" : "hw" : "" ) << "private:"
+ (useuavprivate) ? "uav" : "hw" : "" ) << "private:"
<<(((mMFI->getStackSize() + 15) & (~0xF)))<< "\n";
}
O << ";memory:" << ((usehwlocal) ? "hw" : "") << "local:"
@@ -1061,7 +1065,7 @@
} else {
for (StringMap<SamplerInfo>::iterator
smb = mMFI->sampler_begin(),
- sme = mMFI->sampler_end(); smb != sme; ++ smb) {
+ sme = mMFI->sampler_end(); smb != sme; ++smb) {
O << ";sampler:" << (*smb).second.name << ":" << (*smb).second.idx
<< ":" << ((*smb).second.val == (uint32_t)-1 ? 0 : 1)
<< ":" << ((*smb).second.val != (uint32_t)-1 ? (*smb).second.val : 0)
@@ -1122,18 +1126,19 @@
->getGlobalVariable(argKernel);
if (GV && GV->hasInitializer()) {
const ConstantArray *nameArray
- = dyn_cast_or_null<ConstantArray>(GV->getInitializer());
+ = dyn_cast_or_null<ConstantArray>(GV->getInitializer());
if (nameArray) {
for (unsigned x = 0, y = nameArray->getNumOperands(); x < y; ++x) {
const GlobalVariable *gV= dyn_cast_or_null<GlobalVariable>(
- nameArray->getOperand(x)->getOperand(0));
+ nameArray->getOperand(x)->getOperand(0));
const ConstantDataArray *argName =
dyn_cast_or_null<ConstantDataArray>(gV->getInitializer());
if (!argName) {
continue;
}
std::string argStr = argName->getAsString();
- O << ";reflection:" << x << ":" << argStr.substr(0, argStr.length()-1) << "\n";
+ O << ";reflection:" << x << ":" <<
+ argStr.substr(0, argStr.length()-1) << "\n";
}
}
}
@@ -1148,7 +1153,6 @@
O << ";ARGEND:" << mName << "\n";
}
}
-
void AMDILKernelManager::printArgCopies(OSTREAM_TYPE &O,
AMDILAsmPrinter *RegNames)
{
@@ -1166,14 +1170,17 @@
uint32_t stackSize = mMFI->getStackSize();
uint32_t privateSize = mMFI->getScratchSize();
uint32_t stackOffset = (privateSize + 15) & (~0xF);
- if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem) && !mSTM->overridesFlatAS()) {
+ if (mSTM->device()->usesHardware(AMDILDeviceInfo::PrivateMem) &&
+ !mSTM->overridesFlatAS()) {
// TODO: If the size is too large, we need to fall back to software emulated
// instead of using the hardware capability.
- int size = (((((stackSize != privateSize) ? stackSize + privateSize : stackSize)
- + 15) & (~0xF)) >> 4)
- + (mSTM->device()->isSupported(AMDILDeviceInfo::Debug) ? 1 : 0);
+ int size =
+ (((((stackSize != privateSize) ? stackSize + privateSize : stackSize)
+ + 15) & (~0xF)) >> 4)
+ + (mSTM->device()->isSupported(AMDILDeviceInfo::Debug) ? 1 : 0);
if (size > 4096) {
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_PRIVATE_RESOURCES]);
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[
+ INSUFFICIENT_PRIVATE_RESOURCES]);
}
if (size) {
// For any stack variables, we need to declare the literals for them so that
@@ -1204,8 +1211,10 @@
}
}
mMFI->addReservedLiterals(1);
- O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << stackSize << ", "
- << privateSize << ", 16, " << ((stackSize == privateSize) ? 0 : stackOffset) << "\n"
+ O << "dcl_literal l" << mMFI->getNumLiterals() << ", " << stackSize <<
+ ", "
+ << privateSize << ", 16, " <<
+ ((stackSize == privateSize) ? 0 : stackOffset) << "\n"
<< "iadd r0.x, " << RegNames->getRegisterName(AMDIL::T1) << ".x, l"
<< mMFI->getNumLiterals() << ".w\n";
@@ -1214,6 +1223,7 @@
}
}
I = mMF->getFunction()->arg_begin();
+ int32_t count = 0;
unsigned curReg = 0;
for (I = mMF->getFunction()->arg_begin(); I != Ie; ++I) {
Type *curType = I->getType();
@@ -1229,7 +1239,13 @@
"l3.y" );
break;
case 8:
- printConstantToRegMapping(RegNames, curReg, O, Counter, Buffer, 1, "l3.x" );
+ printConstantToRegMapping(RegNames,
+ curReg,
+ O,
+ Counter,
+ Buffer,
+ 1,
+ "l3.x" );
break;
}
} else if (const VectorType *VT = dyn_cast<VectorType>(curType)) {
@@ -1258,7 +1274,8 @@
(numEle) >> 1);
}
break;
- case 16: {
+ case 16:
+ {
switch (numEle) {
default:
printConstantToRegMapping(RegNames, curReg, O, Counter,
@@ -1271,7 +1288,8 @@
}
break;
}
- case 8: {
+ case 8:
+ {
switch (numEle) {
default:
printConstantToRegMapping(RegNames, curReg, O, Counter,
@@ -1329,13 +1347,17 @@
} else {
const TargetData* TD = mTM->getTargetData();
size_t structSize
- = TD->RoundUpAlignment(TD->getTypeAllocSize(ST), 16);
+ = TD->RoundUpAlignment(TD->getTypeAllocSize(ST), 16);
stackOffset += structSize;
O << "mov ";
printRegName(RegNames, mMFI->getArgReg(curReg), O, true);
O << " r0.x\n";
- printCopyStructPrivate(ST, O, structSize, Buffer, mMFI->getNumLiterals(),
+ printCopyStructPrivate(ST,
+ O,
+ structSize,
+ Buffer,
+ mMFI->getNumLiterals(),
Counter);
++curReg;
}
@@ -1388,33 +1410,15 @@
// constant pointers to the software emulated section.
if (constNum > mSTM->device()->getMaxNumCBs()) {
assert(0 && "Max constant buffer limit passed!");
- mMFI->addErrorMsg(amd::CompilerErrorMessage[INSUFFICIENT_CONSTANT_RESOURCES]);
+ mMFI->addErrorMsg(amd::CompilerErrorMessage[
+ INSUFFICIENT_CONSTANT_RESOURCES]);
}
}
}
-
-void AMDILKernelManager::emitLiterals(OSTREAM_TYPE &O)
-{
+void AMDILKernelManager::emitLiterals(OSTREAM_TYPE &O) {
char buffer[256];
- std::map<uint32_t, uint32_t>::iterator ilb, ile;
- for (ilb = mMFI->begin_32(), ile = mMFI->end_32(); ilb != ile; ++ilb) {
- uint32_t a = ilb->first;
- O << "dcl_literal l" <<ilb->second<< ", ";
- sprintf(buffer, "0x%08X, 0x%08X, 0x%08X, 0x%08X", a, a, a, a);
- O << buffer << "; f32:i32 " << ilb->first << "\n";
- }
- std::map<uint64_t, uint32_t>::iterator llb, lle;
- for (llb = mMFI->begin_64(), lle = mMFI->end_64(); llb != lle; ++llb) {
- uint32_t v[2];
- uint64_t a = llb->first;
- memcpy(v, &a, sizeof(uint64_t));
- O << "dcl_literal l" <<llb->second<< ", ";
- sprintf(buffer, "0x%08X, 0x%08X, 0x%08X, 0x%08X; f64:i64 ",
- v[0], v[1], v[0], v[1]);
- O << buffer << llb->first << "\n";
- }
std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator vlb, vle;
- for (vlb = mMFI->begin_128(), vle = mMFI->end_128(); vlb != vle; ++vlb) {
+ for (vlb = mMFI->lit_begin(), vle = mMFI->lit_end(); vlb != vle; ++vlb) {
uint32_t v[2][2];
uint64_t a = vlb->first.first;
uint64_t b = vlb->first.second;
@@ -1426,18 +1430,14 @@
O << buffer << vlb->first.first << vlb->first.second << "\n";
}
}
-
// If the value is not known, then the uav is set, otherwise the mValueIDMap
// is used.
-void AMDILKernelManager::setUAVID(const Value *value, uint32_t ID)
-{
+void AMDILKernelManager::setUAVID(const Value *value, uint32_t ID) {
if (value) {
mValueIDMap[value] = ID;
}
}
-
-uint32_t AMDILKernelManager::getUAVID(const Value *value)
-{
+uint32_t AMDILKernelManager::getUAVID(const Value *value) {
if (mValueIDMap.find(value) != mValueIDMap.end()) {
return mValueIDMap[value];
}
@@ -1448,4 +1448,3 @@
return mSTM->device()->getResourceID(AMDILDevice::RAW_UAV_ID);
}
}
-
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILKernelManager.h Wed Sep 12 12:43:34 2012
@@ -29,8 +29,7 @@
#define IMAGETYPE_3D 1
#define RESERVED_LIT_COUNT 6
-namespace llvm
-{
+namespace llvm {
class AMDILSubtarget;
class AMDILMachineFunctionInfo;
class AMDILModuleInfo;
@@ -44,9 +43,7 @@
class ConstantFP;
class PrintfInfo;
-
-class AMDILKernelManager
-{
+class AMDILKernelManager {
public:
typedef enum {
RELEASE_ONLY,
@@ -65,7 +62,6 @@
void processArgMetadata(OSTREAM_TYPE &O,
uint32_t buf, bool kernel);
-
/// Prints the header for the kernel which includes the groupsize declaration
/// and calculation of the local/group/global id's.
void printHeader(AMDILAsmPrinter *AsmPrinter, OSTREAM_TYPE &O,
@@ -104,6 +100,9 @@
// Returns whether a compiler needs to insert a write to memory or not.
bool useCompilerWrite(const MachineInstr *MI);
+ // Return whether a region_barrier is used
+ bool useRegionBarrier(const MachineInstr *MI);
+
// Set the flag that there exists an image write.
void setImageWrite();
void setOutputInst();
@@ -155,6 +154,7 @@
/// compiler specific write if no other writes to memory occured.
bool mHasImageWrite;
bool mHasOutputInst;
+ bool mHasRegionBarrier;
/// Map from const Value * to UAV ID.
std::map<const Value *, uint32_t> mValueIDMap;
@@ -166,6 +166,5 @@
AMDILMachineFunctionInfo *mMFI;
AMDILModuleInfo *mAMI;
}; // class AMDILKernelManager
-
} // llvm namespace
#endif // _AMDILKERNELMANAGER_H_
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILLiteralManager.cpp Wed Sep 12 12:43:34 2012
@@ -18,6 +18,8 @@
#include "AMDILMachineFunctionInfo.h"
#include "AMDILSubtarget.h"
#include "AMDILTargetMachine.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/Debug.h"
@@ -25,7 +27,6 @@
using namespace llvm;
-
// AMDIL Literal Manager traverses through all of the LOADCONST instructions and
// converts them from an immediate value to the literal index. The literal index
// is valid IL, but the immediate values are not. The Immediate values must be
@@ -33,10 +34,8 @@
// are used. It is also illegal to declare the same literal twice, so this keeps
// that from occuring.
-namespace
-{
-class AMDILLiteralManager : public MachineFunctionPass
-{
+namespace {
+class AMDILLiteralManager : public MachineFunctionPass {
public:
static char ID;
AMDILLiteralManager(TargetMachine &tm, CodeGenOpt::Level OL);
@@ -55,94 +54,99 @@
char AMDILLiteralManager::ID = 0;
}
-namespace llvm
-{
+namespace llvm {
FunctionPass *
-createAMDILLiteralManager(TargetMachine &tm, CodeGenOpt::Level OL)
-{
+createAMDILLiteralManager(TargetMachine &tm, CodeGenOpt::Level OL) {
return new AMDILLiteralManager(tm, OL);
}
-
}
AMDILLiteralManager::AMDILLiteralManager(TargetMachine &tm,
- CodeGenOpt::Level OL)
+ CodeGenOpt::Level OL)
: MachineFunctionPass(ID),
- TM(tm)
-{
+ TM(tm) {
}
-
-bool AMDILLiteralManager::runOnMachineFunction(MachineFunction &MF)
-{
+bool AMDILLiteralManager::runOnMachineFunction(MachineFunction &MF) {
mChanged = false;
+ DEBUG(MF.dump());
mMFI = MF.getInfo<AMDILMachineFunctionInfo>();
const AMDILTargetMachine *amdtm =
reinterpret_cast<const AMDILTargetMachine *>(&TM);
mSTM = dynamic_cast<const AMDILSubtarget *>(amdtm->getSubtargetImpl());
mKM = const_cast<AMDILKernelManager *>(mSTM->getKernelManager());
safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
- std::bind1st(std::mem_fun(&AMDILLiteralManager::trackLiterals), this));
+ std::bind1st(std::mem_fun(&AMDILLiteralManager::
+ trackLiterals), this));
+ DEBUG(MF.dump());
return mChanged;
}
-
-bool AMDILLiteralManager::trackLiterals(MachineBasicBlock::iterator *bbb)
-{
+bool AMDILLiteralManager::trackLiterals(MachineBasicBlock::iterator *bbb) {
MachineInstr *MI = *bbb;
uint32_t Opcode = MI->getOpcode();
- switch(Opcode) {
- default:
- return false;
- case AMDIL::VCREATE_v2i8:
- case AMDIL::VCREATE_v2i16:
- case AMDIL::VCREATE_v2i32:
- case AMDIL::VCREATE_v2i64:
- case AMDIL::VCREATE_v2f32:
- case AMDIL::VCREATE_v2f64:
- case AMDIL::VCREATE_v4i8:
- case AMDIL::VCREATE_v4i16:
- case AMDIL::VCREATE_v4i32:
- case AMDIL::VCREATE_v4f32:
- case AMDIL::LOADCONST_i8:
- case AMDIL::LOADCONST_i16:
- case AMDIL::LOADCONST_i32:
- case AMDIL::LOADCONST_i64:
- case AMDIL::LOADCONST_f32:
- case AMDIL::LOADCONST_f64:
- break;
- };
- MachineOperand &dstOp = MI->getOperand(0);
- MachineOperand &litOp = MI->getOperand(1);
- if (!litOp.isImm() && !litOp.isFPImm()) {
- return false;
- }
- if (!dstOp.isReg()) {
- return false;
- }
- // Change the literal to the correct index for each literal that is found.
- if (litOp.isImm()) {
- int64_t immVal = litOp.getImm();
- uint32_t idx = MI->getOpcode() == AMDIL::LOADCONST_i64
- ? mMFI->addi64Literal(immVal)
- : mMFI->addi32Literal(static_cast<int>(immVal), Opcode);
+ for (unsigned x = 0, y = MI->getNumOperands(); x < y; ++x) {
+ MachineOperand &litOp = MI->getOperand(x);
+ if ((!litOp.isImm() && !litOp.isFPImm())
+ || isBypassedLiteral(MI, x)
+ || isSkippedLiteral(MI, x)
+ || !MI->getDesc().OpInfo) {
+ continue;
+ }
+ /*
+ assert(Opcode <= AMDIL::LOADCONSTf64 && Opcode >= AMDIL::LOADCONSTi8
+ && "Found a loadconst instruction!");
+ */
+ uint32_t idx;
+ if (litOp.isFPImm()) {
+ const ConstantFP *fpVal = litOp.getFPImm();
+ const APFloat &fp = fpVal->getValueAPF();
+ const fltSemantics &fpSem = fpVal->getValueAPF().getSemantics();
+ if (&fpSem == &APFloat::IEEEsingle) {
+ idx = mMFI->addf32Literal(fpVal);
+ } else if (&fpSem == &APFloat::IEEEdouble) {
+ idx = mMFI->addf64Literal(fpVal);
+ } else {
+ assert(!"Found a case we don't handle!");
+ }
+ } else if (litOp.isImm()) {
+ unsigned regClass = MI->getDesc().OpInfo[x].RegClass;
+ if (regClass == ~0U) {
+ regClass = getRegClassFromName(TM.getInstrInfo()->getName(Opcode));
+ }
+ int64_t immVal = litOp.getImm();
+ switch (regClass) {
+ default:
+ idx = Opcode == AMDIL::LOADCONSTi64
+ ? mMFI->addi64Literal(immVal)
+ : mMFI->addi32Literal(static_cast<int>(immVal));
+ break;
+ case AMDIL::GPRI8RegClassID:
+ case AMDIL::GPRV2I8RegClassID:
+ case AMDIL::GPRV4I8RegClassID:
+ idx = mMFI->addi32Literal(static_cast<int>(immVal), AMDIL::LOADCONSTi8);
+ break;
+ case AMDIL::GPRI16RegClassID:
+ case AMDIL::GPRV2I16RegClassID:
+ case AMDIL::GPRV4I16RegClassID:
+ idx = mMFI->addi32Literal(static_cast<int>(immVal), AMDIL::LOADCONSTi16);
+ break;
+ case AMDIL::GPRI32RegClassID:
+ case AMDIL::GPRV2I32RegClassID:
+ case AMDIL::GPRV4I32RegClassID:
+ idx = mMFI->addi32Literal(static_cast<int>(immVal));
+ break;
+ case AMDIL::GPRI64RegClassID:
+ case AMDIL::GPRV2I64RegClassID:
+ idx = mMFI->addi64Literal(immVal);
+ break;
+ }
+ } else {
+ assert(!"Should never hit here unless a new literal type was added!");
+ }
litOp.ChangeToImmediate(idx);
- return false;
- }
-
- if (litOp.isFPImm()) {
- const ConstantFP *fpVal = litOp.getFPImm();
- uint32_t idx = MI->getOpcode() == AMDIL::LOADCONST_f64
- ? mMFI->addf64Literal(fpVal)
- : mMFI->addf32Literal(fpVal);
- litOp.ChangeToImmediate(idx);
- return false;
}
return false;
}
-
-const char* AMDILLiteralManager::getPassName() const
-{
+const char* AMDILLiteralManager::getPassName() const {
return "AMDIL Literal Manager";
}
-
-
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.cpp Wed Sep 12 12:43:34 2012
@@ -17,6 +17,8 @@
#define NULL 0
#endif
+#include "llvm/Config/config.h"
+
using namespace llvm;
AMDILMCAsmInfo::AMDILMCAsmInfo(const Triple &Triple) : MCAsmInfo()
{
@@ -90,10 +92,8 @@
ExceptionsType = ExceptionHandling::None;
DwarfUsesInlineInfoSection = false;
DwarfSectionOffsetDirective = ".offset";
- //DwarfUsesLabelOffsetForRanges = true;
//===--- CBE Asm Translation Table -----------------------------------===//
- //AsmTransCBE = NULL;
}
const char*
AMDILMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
@@ -106,4 +106,3 @@
};
return NULL;
}
-
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCAsmInfo.h Wed Sep 12 12:43:34 2012
@@ -16,12 +16,10 @@
#include "llvm/MC/MCAsmInfo.h"
#include "AMDILLLVMPC.h"
-namespace llvm
-{
+namespace llvm {
class Triple;
-class AMDILMCAsmInfo : public MCAsmInfo
-{
+class AMDILMCAsmInfo : public MCAsmInfo {
public:
AMDILMCAsmInfo(const Triple &Triple);
const char*
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp Wed Sep 12 12:43:34 2012
@@ -20,19 +20,16 @@
#include "llvm/MC/MCInst.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
#if 0
-namespace
-{
-class AMDILMCCodeEmitter : public MCCodeEmitter
-{
- AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT
- void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT
- const TargetMachine &TM;
- const TargetInstrInfo &TII;
- MCContext &Ctx;
- bool Is64BitMode;
+namespace {
+class AMDILMCCodeEmitter : public MCCodeEmitter {
+AMDILMCCodeEmitter(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT
+void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT
+const TargetMachine &TM;
+const TargetInstrInfo &TII;
+MCContext &Ctx;
+bool Is64BitMode;
public:
AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit);
~AMDILMCCodeEmitter();
@@ -48,12 +45,10 @@
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
-
-}; // class AMDILMCCodeEmitter
+}; // class AMDILMCCodeEmitter
}; // anonymous namespace
-namespace llvm
-{
+namespace llvm {
MCCodeEmitter *createAMDILMCCodeEmitter(const Target &,
TargetMachine &TM, MCContext &Ctx)
{
@@ -67,17 +62,14 @@
{
Is64BitMode = is64Bit;
}
-
AMDILMCCodeEmitter::~AMDILMCCodeEmitter()
{
}
-
unsigned
AMDILMCCodeEmitter::getNumFixupKinds() const
{
return 0;
}
-
const MCFixupKindInfo &
AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const
{
@@ -88,10 +80,8 @@
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
return MCCodeEmitter::getFixupKindInfo(Kind);
-// return Infos[Kind - FirstTargetFixupKind];
-
+ // return Infos[Kind - FirstTargetFixupKind];
}
-
void
AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte,
raw_ostream &OS) const
@@ -110,9 +100,13 @@
}
}
void
-AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize,
- MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const
+AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp,
+ unsigned ImmSize,
+ MCFixupKind FixupKind,
+ unsigned &CurByte,
+ raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ int ImmOffset) const
{
// If this is a simple integer displacement that doesn't require a relocation
// emit it now.
@@ -132,7 +126,6 @@
// TODO: Why the 4 zeros?
EmitConstant(0, ImmSize, CurByte, OS);
}
-
void
AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const
@@ -151,7 +144,7 @@
unsigned char BaseOpcode = 0;
#ifndef NDEBUG
// FIXME: Verify.
- if (// !Desc.isVariadic() &&
+ if ( // !Desc.isVariadic() &&
CurOp != NumOps) {
errs() << "Cannot encode all operands of: ";
MI.dump();
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp Wed Sep 12 12:43:34 2012
@@ -35,12 +35,13 @@
#include <utility>
using namespace llvm;
-static const AMDILConstPtr *getConstPtr(const AMDILKernel *krnl, const std::string &arg)
-{
+static const AMDILConstPtr *getConstPtr(const AMDILKernel *krnl,
+ const std::string &arg) {
if (!krnl) {
return NULL;
}
- llvm::SmallVector<AMDILConstPtr, DEFAULT_VEC_SLOTS>::const_iterator begin, end;
+ llvm::SmallVector<AMDILConstPtr,
+ DEFAULT_VEC_SLOTS>::const_iterator begin, end;
for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end();
begin != end; ++begin) {
if (!strcmp(begin->name.data(),arg.c_str())) {
@@ -49,33 +50,22 @@
}
return NULL;
}
-
-void PrintfInfo::addOperand(size_t idx, uint32_t size)
-{
+void PrintfInfo::addOperand(size_t idx, uint32_t size) {
mOperands.resize((unsigned)(idx + 1));
mOperands[(unsigned)idx] = size;
}
-
-uint32_t PrintfInfo::getPrintfID()
-{
+uint32_t PrintfInfo::getPrintfID() {
return mPrintfID;
}
-
-void PrintfInfo::setPrintfID(uint32_t id)
-{
+void PrintfInfo::setPrintfID(uint32_t id) {
mPrintfID = id;
}
-
-size_t PrintfInfo::getNumOperands()
-{
+size_t PrintfInfo::getNumOperands() {
return mOperands.size();
}
-
-uint32_t PrintfInfo::getOperandID(uint32_t idx)
-{
+uint32_t PrintfInfo::getOperandID(uint32_t idx) {
return mOperands[idx];
}
-
AMDILMachineFunctionInfo::AMDILMachineFunctionInfo()
: CalleeSavedFrameSize(0), BytesToPopOnReturn(0),
DecorationStyle(None), ReturnAddrIndex(0),
@@ -93,7 +83,6 @@
mArgSize = -1;
mStackSize = -1;
}
-
AMDILMachineFunctionInfo::AMDILMachineFunctionInfo(MachineFunction& MF)
: CalleeSavedFrameSize(0), BytesToPopOnReturn(0),
DecorationStyle(None), ReturnAddrIndex(0),
@@ -124,7 +113,6 @@
mArgSize = -1;
mStackSize = -1;
}
-
AMDILMachineFunctionInfo::~AMDILMachineFunctionInfo()
{
for (std::map<std::string, PrintfInfo*>::iterator pfb = printf_begin(),
@@ -192,7 +180,6 @@
{
SRetReturnReg = reg;
}
-
bool
AMDILMachineFunctionInfo::usesHWConstant(std::string name) const
{
@@ -203,15 +190,13 @@
return false;
}
}
-
uint32_t
AMDILMachineFunctionInfo::getLocal(uint32_t dim)
{
if (mKernel && mKernel->sgv) {
AMDILKernelAttr *sgv = mKernel->sgv;
switch (dim) {
- default:
- break;
+ default: break;
case 0:
case 1:
case 2:
@@ -241,13 +226,11 @@
{
return mKernel != NULL && mKernel->mKernel;
}
-
AMDILKernel*
AMDILMachineFunctionInfo::getKernel()
{
return mKernel;
}
-
std::string
AMDILMachineFunctionInfo::getName()
{
@@ -257,13 +240,13 @@
return "";
}
}
-
uint32_t
AMDILMachineFunctionInfo::getArgSize()
{
if (mArgSize == -1) {
const AMDILTargetMachine *TM =
reinterpret_cast<const AMDILTargetMachine*>(&mMF->getTarget());
+ const TargetData* TD = TM->getTargetData();
Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
Function::const_arg_iterator Ie = mMF->getFunction()->arg_end();
uint32_t Counter = 0;
@@ -351,6 +334,7 @@
{
const AMDILTargetMachine *TM =
reinterpret_cast<const AMDILTargetMachine*>(&mMF->getTarget());
+ const TargetData* TD = TM->getTargetData();
if (mScratchSize == -1) {
mScratchSize = 0;
Function::const_arg_iterator I = mMF->getFunction()->arg_begin();
@@ -365,7 +349,6 @@
}
return (uint32_t)mScratchSize;
}
-
uint32_t
AMDILMachineFunctionInfo::getStackSize()
{
@@ -396,126 +379,89 @@
mStackSize = privSize;
}
return (uint32_t)mStackSize;
-
}
-
uint32_t
-AMDILMachineFunctionInfo::addi32Literal(uint32_t val, int Opcode)
-{
+AMDILMachineFunctionInfo::addi32Literal(uint32_t val, int Opcode) {
// Since we have emulated 16/8/1 bit register types with a 32bit real
// register, we need to sign extend the constants to 32bits in order for
// comparisons against the constants to work correctly, this fixes some issues
// we had in conformance failing for saturation.
- if (Opcode == AMDIL::LOADCONST_i16) {
+ if (Opcode == AMDIL::LOADCONSTi16) {
val = (((int32_t)val << 16) >> 16);
- } else if (Opcode == AMDIL::LOADCONST_i8) {
+ } else if (Opcode == AMDIL::LOADCONSTi8) {
val = (((int32_t)val << 24) >> 24);
}
- if (mIntLits.find(val) == mIntLits.end()) {
- mIntLits[val] = getNumLiterals();
- }
- return mIntLits[val];
+ uint64_t val64b = ((uint64_t)val | (uint64_t)val << 32U);
+ return addLiteral(val64b, val64b);
}
-
uint32_t
-AMDILMachineFunctionInfo::addi64Literal(uint64_t val)
-{
- if (mLongLits.find(val) == mLongLits.end()) {
- mLongLits[val] = getNumLiterals();
- }
- return mLongLits[val];
+AMDILMachineFunctionInfo::addi64Literal(uint64_t val) {
+ return addLiteral(val, val);
}
-
uint32_t
-AMDILMachineFunctionInfo::addi128Literal(uint64_t val_lo, uint64_t val_hi)
-{
+AMDILMachineFunctionInfo::addi128Literal(uint64_t val_lo, uint64_t val_hi) {
+ return addLiteral(val_lo, val_hi);
+}
+uint32_t
+AMDILMachineFunctionInfo::addLiteral(uint64_t val_lo, uint64_t val_hi) {
std::pair<uint64_t, uint64_t> a;
a.first = val_lo;
a.second = val_hi;
- if (mVecLits.find(a) == mVecLits.end()) {
- mVecLits[a] = getNumLiterals();
+ if (mLits.find(a) == mLits.end()) {
+ mLits[a] = getNumLiterals();
}
- return mVecLits[a];
+ return mLits[a];
}
-
uint32_t
-AMDILMachineFunctionInfo::addf32Literal(uint32_t val)
-{
- if (mIntLits.find(val) == mIntLits.end()) {
- mIntLits[val] = getNumLiterals();
- }
- return mIntLits[val];
+AMDILMachineFunctionInfo::addf32Literal(uint32_t val) {
+ uint64_t Val64b = ((uint64_t)val | ((uint64_t)val << 32));
+ return addLiteral(Val64b, Val64b);
}
-
uint32_t
-AMDILMachineFunctionInfo::addf32Literal(const ConstantFP *CFP)
-{
+AMDILMachineFunctionInfo::addf32Literal(const ConstantFP *CFP) {
uint32_t val = (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue();
- if (mIntLits.find(val) == mIntLits.end()) {
- mIntLits[val] = getNumLiterals();
- }
- return mIntLits[val];
+ return addf32Literal(val);
}
-
uint32_t
-AMDILMachineFunctionInfo::addf64Literal(uint64_t val)
-{
- if (mLongLits.find(val) == mLongLits.end()) {
- mLongLits[val] = getNumLiterals();
- }
- return mLongLits[val];
+AMDILMachineFunctionInfo::addf64Literal(uint64_t val) {
+ return addLiteral(val, val);
}
-
uint32_t
-AMDILMachineFunctionInfo::addf64Literal(const ConstantFP *CFP)
-{
+AMDILMachineFunctionInfo::addf64Literal(const ConstantFP *CFP) {
union dtol_union {
double d;
uint64_t ul;
} dval;
const APFloat &APF = CFP->getValueAPF();
- if (&APF.getSemantics() == (const llvm::fltSemantics *)&APFloat::IEEEsingle) {
+ if (&APF.getSemantics() ==
+ (const llvm::fltSemantics *)&APFloat::IEEEsingle) {
float fval = APF.convertToFloat();
dval.d = (double)fval;
} else {
dval.d = APF.convertToDouble();
}
- if (mLongLits.find(dval.ul) == mLongLits.end()) {
- mLongLits[dval.ul] = getNumLiterals();
- }
- return mLongLits[dval.ul];
-}
-
-uint32_t
-AMDILMachineFunctionInfo::getIntLits(uint32_t offset)
-{
- return mIntLits[offset];
+ return addLiteral(dval.ul, dval.ul);
}
-
uint32_t
-AMDILMachineFunctionInfo::getLongLits(uint64_t offset)
+AMDILMachineFunctionInfo::getLitIdx(uint32_t val)
{
- return mLongLits[offset];
+ uint64_t val64 = ((uint64_t)val | ((uint64_t)val << 32));
+ return mLits[std::pair<uint64_t, uint64_t>(val64, val64)];
}
-
uint32_t
-AMDILMachineFunctionInfo::getVecLits(uint64_t low64, uint64_t high64)
+AMDILMachineFunctionInfo::getLitIdx(uint64_t val)
{
- return mVecLits[std::pair<uint64_t, uint64_t>(low64, high64)];
+ return mLits[std::pair<uint64_t, uint64_t>(val, val)];
}
-
size_t
-AMDILMachineFunctionInfo::getNumLiterals() const
-{
- return mLongLits.size() + mIntLits.size() + mVecLits.size() + mReservedLits;
+AMDILMachineFunctionInfo::getNumLiterals() const {
+ return mLits.size() + mReservedLits;
}
-
void
AMDILMachineFunctionInfo::addReservedLiterals(uint32_t size)
{
mReservedLits += size;
}
-
uint32_t
AMDILMachineFunctionInfo::addSampler(std::string name, uint32_t val)
{
@@ -533,39 +479,33 @@
return curVal.idx;
}
}
-
void
-AMDILMachineFunctionInfo::setUsesMem(unsigned id)
-{
+AMDILMachineFunctionInfo::setUsesMem(unsigned id) {
assert(id < AMDILDevice::MAX_IDS &&
"Must set the ID to be less than MAX_IDS!");
mUsedMem[id] = true;
}
-
bool
-AMDILMachineFunctionInfo::usesMem(unsigned id)
-{
+AMDILMachineFunctionInfo::usesMem(unsigned id) {
assert(id < AMDILDevice::MAX_IDS &&
"Must set the ID to be less than MAX_IDS!");
return mUsedMem[id];
}
-
void
AMDILMachineFunctionInfo::addErrorMsg(const char *msg, ErrorMsgEnum val)
{
if (val == DEBUG_ONLY) {
-#if defined(DEBUG) || defined(_DEBUG)
+#if !defined(NDEBUG)
mErrors.insert(msg);
#endif
} else if (val == RELEASE_ONLY) {
-#if !defined(DEBUG) && !defined(_DEBUG)
+#if defined(NDEBUG)
mErrors.insert(msg);
#endif
} else if (val == ALWAYS) {
mErrors.insert(msg);
}
}
-
uint32_t
AMDILMachineFunctionInfo::addPrintfString(std::string &name, unsigned offset)
{
@@ -578,21 +518,18 @@
return info->getPrintfID();
}
}
-
void
AMDILMachineFunctionInfo::addPrintfOperand(std::string &name,
- size_t idx,
- uint32_t size)
+ size_t idx,
+ uint32_t size)
{
mPrintfMap[name]->addOperand(idx, size);
}
-
void
AMDILMachineFunctionInfo::addMetadata(const char *md, bool kernelOnly)
{
addMetadata(std::string(md), kernelOnly);
}
-
void
AMDILMachineFunctionInfo::addMetadata(std::string md, bool kernelOnly)
{
@@ -602,7 +539,6 @@
mMetadataFunc.insert(md);
}
}
-
size_t
AMDILMachineFunctionInfo::get_num_write_images()
{
@@ -610,7 +546,6 @@
+ write_image2d_array_size() + write_image1d_array_size()
+ write_image1d_size() + write_image1d_buffer_size();
}
-
bool
AMDILMachineFunctionInfo::isSignedIntType(const Value* ptr)
{
@@ -624,7 +559,8 @@
if (!GV || !GV->hasInitializer()) return false;
const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
if (!CA) return false;
- for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) {
+ for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop;
+ ++start) {
const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(start));
if (!nameField) continue;
@@ -655,7 +591,8 @@
if (!GV || !GV->hasInitializer()) return false;
const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
if (!CA) return false;
- for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) {
+ for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop;
+ ++start) {
const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(start));
if (!nameField) continue;
@@ -686,7 +623,8 @@
if (!GV || !GV->hasInitializer()) return false;
const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
if (!CA) return false;
- for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) {
+ for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop;
+ ++start) {
const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(start));
if (!nameField) continue;
@@ -704,7 +642,6 @@
}
return false;
}
-
bool
AMDILMachineFunctionInfo::isConstantArgument(const Value* ptr)
{
@@ -718,7 +655,8 @@
if (!GV || !GV->hasInitializer()) return false;
const ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
if (!CA) return false;
- for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) {
+ for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop;
+ ++start) {
const ConstantExpr *nameField = dyn_cast<ConstantExpr>(CA->getOperand(start));
if (!nameField) continue;
@@ -736,4 +674,3 @@
}
return false;
}
-
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachineFunctionInfo.h Wed Sep 12 12:43:34 2012
@@ -30,34 +30,32 @@
namespace llvm
{
class AMDILSubtarget;
-class PrintfInfo
-{
- uint32_t mPrintfID;
- SmallVector<uint32_t, DEFAULT_VEC_SLOTS> mOperands;
+class PrintfInfo {
+uint32_t mPrintfID;
+SmallVector<uint32_t, DEFAULT_VEC_SLOTS> mOperands;
public:
void addOperand(size_t idx, uint32_t size);
uint32_t getPrintfID();
void setPrintfID(uint32_t idx);
size_t getNumOperands();
uint32_t getOperandID(uint32_t idx);
-}; // class PrintfInfo
+}; // class PrintfInfo
-enum NameDecorationStyle {
+enum NameDecorationStyle
+{
None,
StdCall,
FastCall
};
typedef struct SamplerInfoRec {
- std::string name; // The name of the sampler
- uint32_t val; // The value of the sampler
- uint32_t idx; // The sampler resource id
+ std::string name; // The name of the sampler
+ uint32_t val; // The value of the sampler
+ uint32_t idx; // The sampler resource id
} SamplerInfo;
// Some typedefs that will help with using the various iterators
// of the machine function info class.
-typedef std::map<uint32_t, uint32_t>::iterator lit32_iterator;
-typedef std::map<uint64_t, uint32_t>::iterator lit64_iterator;
typedef std::map<std::pair<uint64_t, uint64_t>, uint32_t>::iterator
-lit128_iterator;
+lit_iterator;
typedef StringMap<SamplerInfo>::iterator sampler_iterator;
typedef DenseSet<uint32_t>::iterator func_iterator;
typedef DenseSet<uint32_t>::iterator intr_iterator;
@@ -85,133 +83,126 @@
// amdil target-specific information for each MachineFunction
class AMDILMachineFunctionInfo : public MachineFunctionInfo
{
- // CalleeSavedFrameSize - Size of the callee-saved
- // register portion of the
- // stack frame in bytes.
- unsigned int CalleeSavedFrameSize;
- // BytesToPopOnReturn - Number of bytes function pops on return.
- // Used on windows platform for stdcall & fastcall name decoration
- unsigned int BytesToPopOnReturn;
- // DecorationStyle - If the function requires additional
- // name decoration,
- // DecorationStyle holds the right way to do so.
- NameDecorationStyle DecorationStyle;
- // ReturnAddrIndex - FrameIndex for return slot.
- int ReturnAddrIndex;
-
- // TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
- // Used for creating an area before the register spill area
- // on the stack
- // the returnaddr can be savely move to this area
- int TailCallReturnAddrDelta;
-
- // SRetReturnReg - Some subtargets require that sret lowering includes
- // returning the value of the returned struct in a register.
- // This field holds the virtual register into which the sret
- // argument is passed.
- unsigned int SRetReturnReg;
-
- // The size in bytes required to host all of the kernel arguments.
- // -1 means this value has not been determined yet.
- int32_t mArgSize;
-
- // The size in bytes required to host the stack and the kernel arguments
- // in private memory.
- // -1 means this value has not been determined yet.
- int32_t mScratchSize;
-
- // The size in bytes required to host the the kernel arguments
- // on the stack.
- // -1 means this value has not been determined yet.
- int32_t mStackSize;
-
- /// A map of constant to literal mapping for all of the 32bit or
- /// smaller literals in the current function.
- std::map<uint32_t, uint32_t> mIntLits;
-
- /// A map of constant to literal mapping for all of the 64bit
- /// literals in the current function.
- std::map<uint64_t, uint32_t> mLongLits;
-
- /// A map of constant to literal mapping for all of the 128bit
- /// literals in the current function.
- std::map<std::pair<uint64_t, uint64_t>, uint32_t> mVecLits;
-
- /// The number of literals that should be reserved.
- /// TODO: Remove this when the wrapper emitter is added.
- uint32_t mReservedLits;
-
- /// A map of name to sampler information that is used to emit
- /// metadata to the IL stream that the runtimes can use for
- /// hardware setup.
- StringMap<SamplerInfo> mSamplerMap;
-
- /// Array of flags to specify if a specific memory type is used or not.
- bool mUsedMem[AMDILDevice::MAX_IDS];
-
- /// Set of all functions that this function calls.
- DenseSet<uint32_t> mFuncs;
-
- /// Set of all intrinsics that this function calls.
- DenseSet<uint32_t> mIntrs;
-
- /// Set of all write only 1D images.
- DenseSet<uint32_t> mWO1D;
- /// Set of all read only 1D images.
- DenseSet<uint32_t> mRO1D;
- /// Set of all write only 1D image arrays.
- DenseSet<uint32_t> mWO1DA;
- /// Set of all read only 1D image arrays.
- DenseSet<uint32_t> mRO1DA;
- /// Set of all write only 1D image buffers.
- DenseSet<uint32_t> mWO1DB;
- /// Set of all read only 1D image buffers.
- DenseSet<uint32_t> mRO1DB;
- /// Set of all write only 2D images.
- DenseSet<uint32_t> mWO2D;
- /// Set of all read only 2D images.
- DenseSet<uint32_t> mRO2D;
- /// Set of all write only 2D image arrays.
- DenseSet<uint32_t> mWO2DA;
- /// Set of all read only 2D image arrays.
- DenseSet<uint32_t> mRO2DA;
- /// Set of all read only 3D images.
- DenseSet<uint32_t> mRO3D;
- /// Set of all write only 3D images.
- DenseSet<uint32_t> mWO3D;
- /// Set of all the raw uavs.
- DenseSet<uint32_t> mRawUAV;
- /// Set of all the arena uavs.
- DenseSet<uint32_t> mArenaUAV;
-
- /// Set of all semaphores
- DenseSet<uint32_t> mSemaphore;
-
- /// Set of all the read-only pointers
- DenseSet<const Value*> mReadPtr;
-
- /// A set of all errors that occured in the backend for this function.
- DenseSet<const char *> mErrors;
-
- /// A mapping of printf data and the printf string
- std::map<std::string, PrintfInfo*> mPrintfMap;
-
- /// A set of all of the metadata that is used for the current function.
- std::set<std::string> mMetadataFunc;
-
- /// A set of all of the metadata that is used for the function wrapper.
- std::vector<std::string> mMetadataKernel;
-
- SmallVector<unsigned, 16> mArgRegs;
+// CalleeSavedFrameSize - Size of the callee-saved
+// register portion of the
+// stack frame in bytes.
+unsigned int CalleeSavedFrameSize;
+// BytesToPopOnReturn - Number of bytes function pops on return.
+// Used on windows platform for stdcall & fastcall name decoration
+unsigned int BytesToPopOnReturn;
+// DecorationStyle - If the function requires additional
+// name decoration,
+// DecorationStyle holds the right way to do so.
+NameDecorationStyle DecorationStyle;
+// ReturnAddrIndex - FrameIndex for return slot.
+int ReturnAddrIndex;
+
+// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved
+// Used for creating an area before the register spill area
+// on the stack
+// the returnaddr can be savely move to this area
+int TailCallReturnAddrDelta;
+
+// SRetReturnReg - Some subtargets require that sret lowering includes
+// returning the value of the returned struct in a register.
+// This field holds the virtual register into which the sret
+// argument is passed.
+unsigned int SRetReturnReg;
+
+// The size in bytes required to host all of the kernel arguments.
+// -1 means this value has not been determined yet.
+int32_t mArgSize;
+
+// The size in bytes required to host the stack and the kernel arguments
+// in private memory.
+// -1 means this value has not been determined yet.
+int32_t mScratchSize;
+
+// The size in bytes required to host the the kernel arguments
+// on the stack.
+// -1 means this value has not been determined yet.
+int32_t mStackSize;
+
+/// A map of constant to literal mapping for all of the 128bit
+/// literals in the current function.
+std::map<std::pair<uint64_t, uint64_t>, uint32_t> mLits;
+uint32_t addLiteral(uint64_t val_lo, uint64_t val_hi);
+
+/// The number of literals that should be reserved.
+/// TODO: Remove this when the wrapper emitter is added.
+uint32_t mReservedLits;
+
+/// A map of name to sampler information that is used to emit
+/// metadata to the IL stream that the runtimes can use for
+/// hardware setup.
+StringMap<SamplerInfo> mSamplerMap;
+
+/// Array of flags to specify if a specific memory type is used or not.
+bool mUsedMem[AMDILDevice::MAX_IDS];
+
+/// Set of all functions that this function calls.
+DenseSet<uint32_t> mFuncs;
+
+/// Set of all intrinsics that this function calls.
+DenseSet<uint32_t> mIntrs;
+
+/// Set of all write only 1D images.
+DenseSet<uint32_t> mWO1D;
+/// Set of all read only 1D images.
+DenseSet<uint32_t> mRO1D;
+/// Set of all write only 1D image arrays.
+DenseSet<uint32_t> mWO1DA;
+/// Set of all read only 1D image arrays.
+DenseSet<uint32_t> mRO1DA;
+/// Set of all write only 1D image buffers.
+DenseSet<uint32_t> mWO1DB;
+/// Set of all read only 1D image buffers.
+DenseSet<uint32_t> mRO1DB;
+/// Set of all write only 2D images.
+DenseSet<uint32_t> mWO2D;
+/// Set of all read only 2D images.
+DenseSet<uint32_t> mRO2D;
+/// Set of all write only 2D image arrays.
+DenseSet<uint32_t> mWO2DA;
+/// Set of all read only 2D image arrays.
+DenseSet<uint32_t> mRO2DA;
+/// Set of all read only 3D images.
+DenseSet<uint32_t> mRO3D;
+/// Set of all write only 3D images.
+DenseSet<uint32_t> mWO3D;
+/// Set of all the raw uavs.
+DenseSet<uint32_t> mRawUAV;
+/// Set of all the arena uavs.
+DenseSet<uint32_t> mArenaUAV;
+
+/// Set of all semaphores
+DenseSet<uint32_t> mSemaphore;
+
+/// Set of all the read-only pointers
+DenseSet<const Value*> mReadPtr;
+
+/// A set of all errors that occured in the backend for this function.
+DenseSet<const char *> mErrors;
+
+/// A mapping of printf data and the printf string
+std::map<std::string, PrintfInfo*> mPrintfMap;
+
+/// A set of all of the metadata that is used for the current function.
+std::set<std::string> mMetadataFunc;
+
+/// A set of all of the metadata that is used for the function wrapper.
+std::vector<std::string> mMetadataKernel;
+
+SmallVector<unsigned, 16> mArgRegs;
- /// Information about the kernel, NULL if the function is not a kernel.
- AMDILKernel *mKernel;
+/// Information about the kernel, NULL if the function is not a kernel.
+AMDILKernel *mKernel;
- /// Pointer to the machine function that this information belongs to.
- MachineFunction *mMF;
+/// Pointer to the machine function that this information belongs to.
+MachineFunction *mMF;
- /// Pointer to the subtarget for this function.
- const AMDILSubtarget *mSTM;
+/// Pointer to the subtarget for this function.
+const AMDILSubtarget *mSTM;
public:
AMDILMachineFunctionInfo();
AMDILMachineFunctionInfo(MachineFunction &MF);
@@ -247,14 +238,15 @@
setSRetReturnReg(unsigned int Reg);
#define AS_SET_GET(A) \
- private: \
- bool Uses##A;\
- bool A##Arg; \
- public: \
- void setUses##A() { Uses##A = true; }\
- bool uses##A() const { return Uses##A; }\
- void setHas##A##Arg() { A##Arg = true; setUses##A(); }\
- bool has##A##Arg() { return A##Arg; }
+ private: \
+ bool Uses ## A; \
+ bool A ## Arg; \
+ public: \
+ void setUses ## A() { Uses ## A = true; \
+ } \
+ bool uses ## A() const { return Uses ## A; } \
+ void setHas ## A ## Arg() { A ## Arg = true; setUses ## A(); } \
+ bool has ## A ## Arg() { return A ## Arg; }
AS_SET_GET(LDS)
AS_SET_GET(GDS)
@@ -294,7 +286,7 @@
/// to the literal to integer and integer to literal mappings.
///
/// Add a 32bit integer value to the literal table.
- uint32_t addi32Literal(uint32_t val, int Opcode = AMDIL::LOADCONST_i32);
+ uint32_t addi32Literal(uint32_t val, int Opcode = AMDIL::LOADCONSTi32);
/// Add a 32bit floating point value to the literal table.
uint32_t addf32Literal(const ConstantFP *CFP);
@@ -318,41 +310,22 @@
size_t getNumLiterals() const;
/// Get the literal ID of an Integer literal of the given offset.
- uint32_t getIntLits(uint32_t lit);
-
- /// Get the literal ID of a Long literal of the given offset.
- uint32_t getLongLits(uint64_t lit);
+ uint32_t getLitIdx(uint32_t lit);
/// Get the literal ID of a Long literal of the given offset.
- uint32_t getVecLits(uint64_t low64, uint64_t high64);
+ uint32_t getLitIdx(uint64_t lit);
/// Add some literals to the number of reserved literals.
void addReservedLiterals(uint32_t);
// Functions that return iterators to the beginning and end
// of the various literal maps.
- // Functions that return the beginning and end of the 32bit literal map
- lit32_iterator begin_32() {
- return mIntLits.begin();
+ // Functions that return the beginning and end of the literal map
+ lit_iterator lit_begin() {
+ return mLits.begin();
}
- lit32_iterator end_32() {
- return mIntLits.end();
- }
-
- // Functions that return the beginning and end of the 64bit literal map
- lit64_iterator begin_64() {
- return mLongLits.begin();
- }
- lit64_iterator end_64() {
- return mLongLits.end();
- }
-
- // Functions that return the beginning and end of the 2x64bit literal map
- lit128_iterator begin_128() {
- return mVecLits.begin();
- }
- lit128_iterator end_128() {
- return mVecLits.end();
+ lit_iterator lit_end() {
+ return mLits.end();
}
// Add a sampler to the set of known samplers for the current kernel.
@@ -366,7 +339,6 @@
return mSamplerMap.end();
}
-
/// Set the flag for the memory ID to true for the current function.
void setUsesMem(unsigned);
/// Retrieve the flag for the memory ID.
@@ -649,9 +621,9 @@
// Add an error to the output for the current function.
typedef enum {
- RELEASE_ONLY, /// Only emit error message in release mode.
- DEBUG_ONLY, /// Only emit error message in debug mode.
- ALWAYS /// Always emit the error message.
+ RELEASE_ONLY, /// Only emit error message in release mode.
+ DEBUG_ONLY, /// Only emit error message in debug mode.
+ ALWAYS /// Always emit the error message.
} ErrorMsgEnum;
/// Add an error message to the set of all error messages.
void addErrorMsg(const char* msg, ErrorMsgEnum val = ALWAYS);
Modified: llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp?rev=163727&r1=163726&r2=163727&view=diff
==============================================================================
--- llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp (original)
+++ llvm/branches/AMDILBackend/lib/Target/AMDIL/AMDILMachinePeephole.cpp Wed Sep 12 12:43:34 2012
@@ -21,59 +21,65 @@
#include "AMDIL.h"
#include "AMDILSubtarget.h"
#include "AMDILUtilityFunctions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+namespace llvm
+{
+extern void initializeAMDILMachinePeepholePass(llvm::PassRegistry&);
+}
using namespace llvm;
namespace
{
class AMDILMachinePeephole : public MachineFunctionPass
{
- typedef std::map<uint32_t, uint32_t> Reg2RegMap;
+typedef SmallVector<MachineBasicBlock*, 32> MachineBlockVec;
+typedef SmallVector<MachineInstr*, 4> MachineInstVec;
+typedef std::map<uint32_t, MachineInstVec*> Reg2InstsMap;
+
public:
static char ID;
- AMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL);
+ AMDILMachinePeephole();
// virtual ~AMDILMachinePeephole();
virtual const char*
getPassName() const;
virtual bool
runOnMachineFunction(MachineFunction &MF);
+ virtual void
+ getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
private:
void insertFence(MachineBasicBlock::iterator &MIB);
inline bool useSWByteShortReg(short opRegClassID);
inline uint32_t genVReg(uint32_t regType) const;
- inline MachineInstrBuilder
- generateMachineInst(uint32_t opcode,
- uint32_t dst,
- MachineBasicBlock::iterator &MIB) const;
- inline MachineInstrBuilder
- generateMachineInst(uint32_t opcode,
- uint32_t dst,
- uint32_t src1,
- MachineBasicBlock::iterator &MIB) const;
- inline MachineInstrBuilder
- generateMachineInst(uint32_t opcode,
- uint32_t dst,
- uint32_t src1,
- uint32_t src2,
- MachineBasicBlock::iterator &MIB) const;
- MachineInstr* findExtendInstruction(MachineOperand& op, bool isSigned);
+ bool findExtendSequence(MachineOperand& op, bool isSigned,
+ MachineInstVec& extendSeq);
void getExtendOpcodes(uint32_t regClassID,
bool isSigned,
- int64_t& constVal,
+ uint32_t& nConsts,
+ int64_t* constVal,
int& promoteOp,
int& demoteOp,
- int& binaryAndOp,
- int& shlOp,
- int& shrOp,
+ int* extendOps,
uint32_t& intRegClassID);
- uint32_t addExtendInstruction(MachineBasicBlock::iterator &MIB,
- uint32_t reg,
- bool isSigned);
+ uint32_t addExtendInstructions(MachineBasicBlock& BB,
+ MachineBasicBlock::instr_iterator I,
+ uint32_t reg,
+ bool isSigned,
+ MachineInstVec& extendSeq);
+ void moveInsts(MachineInstVec& insts,
+ MachineBasicBlock& BB,
+ MachineBasicBlock::instr_iterator I);
void extendOperand(MachineBasicBlock::iterator &MIB,
uint32_t opIdx,
bool isSigned);
@@ -83,41 +89,72 @@
void signExtend(MachineBasicBlock::iterator &MIB, uint32_t opIdx) {
extendOperand(MIB, opIdx, true);
}
-
- TargetMachine &TM;
+ const TargetMachine *TM;
+ const MachineDominatorTree *DomTree;
MachineFunction* MFP;
bool mDebug;
- // map from a register to its sign-extention
- Reg2RegMap sextMap;
- // map from a register to its zero-extention
- Reg2RegMap zextMap;
-}; // AMDILMachinePeephole
-char AMDILMachinePeephole::ID = 0;
+ // map from a register to its sign-extention sequence
+ Reg2InstsMap sextMap;
+ // map from a register to its zero-extention sequence
+ Reg2InstsMap zextMap;
+}; // AMDILMachinePeephole
} // anonymous namespace
+char AMDILMachinePeephole::ID = 0;
+INITIALIZE_PASS_BEGIN(AMDILMachinePeephole, "amdil-machine-peephole",
+ "AMDIL Machine Peephole Optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(AMDILMachinePeephole, "amdil-machine-peephole",
+ "AMDIL Machine Peephole Optimization", false, false)
+
namespace llvm
{
FunctionPass*
-createAMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL)
+createAMDILMachinePeephole()
{
- return new AMDILMachinePeephole(tm, OL);
+ return new AMDILMachinePeephole();
}
} // llvm namespace
-AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm, CodeGenOpt::Level OL)
- : MachineFunctionPass(ID), TM(tm), MFP(NULL), sextMap(), zextMap()
+AMDILMachinePeephole::AMDILMachinePeephole()
+ : MachineFunctionPass(ID),
+ TM(NULL), DomTree(NULL), MFP(NULL), sextMap(), zextMap()
{
mDebug = DEBUGME;
+ initializeAMDILMachinePeepholePass(*PassRegistry::getPassRegistry());
}
+#define GENERATE_3ARG_CASE(A) \
+case A ## rrr: \
+case A ## irr: \
+case A ## rir: \
+case A ## iir: \
+case A ## rri: \
+case A ## iri: \
+case A ## rii: \
+case A ## iii:
+
+#define GENERATE_SH2ARG_CASE(A) \
+case A ## rr: \
+case A ## ri:
+
+#define GENERATE_2ARG_CASE(A) \
+case A ## rr: \
+case A ## ri: \
+case A ## ii:
+
+#define GENERATE_1ARG_CASE(A) \
+case A ## r: \
+case A ## i:
bool
AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF)
{
MFP = &MF;
- sextMap.clear();
- zextMap.clear();
+ TM = &MF.getTarget();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+
bool Changed = false;
- const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>();
+ const AMDILSubtarget *STM = &TM->getSubtarget<AMDILSubtarget>();
for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end();
MBB != MBE; ++MBB) {
MachineBasicBlock *mb = MBB;
@@ -126,220 +163,281 @@
MachineInstr *mi = MIB;
switch (mi->getOpcode()) {
default:
- if (isAtomicInst(TM,mi)) {
+ if (isAtomicInst(mi)) {
// If we don't support the hardware accellerated address spaces,
// then the atomic needs to be transformed to the global atomic.
- if (strstr(TM.getInstrInfo()->getName(mi->getOpcode()), "_L_")
+ if (strstr(TM->getInstrInfo()->getName(mi->getOpcode()), "_L_")
&& STM->device()->usesSoftware(AMDILDeviceInfo::LocalMem)) {
BuildMI(*mb, MIB, mi->getDebugLoc(),
- TM.getInstrInfo()->get(AMDIL::ADD_i32), AMDIL::R1011)
+ TM->getInstrInfo()->get(AMDIL::ADDi32rr), AMDIL::R1011)
.addReg(mi->getOperand(1).getReg())
.addReg(AMDIL::T2);
mi->getOperand(1).setReg(AMDIL::R1011);
mi->setDesc(
- TM.getInstrInfo()->get(
+ TM->getInstrInfo()->get(
(mi->getOpcode() - AMDIL::ATOM_L_ADD) + AMDIL::ATOM_G_ADD));
- } else if (strstr(TM.getInstrInfo()->getName(mi->getOpcode()), "_R_")
- && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem)) {
+ } else if (strstr(TM->getInstrInfo()->getName(mi->getOpcode()), "_R_")
+ && STM->device()->usesSoftware(AMDILDeviceInfo::RegionMem))
+ {
assert(!"Software region memory is not supported!");
mi->setDesc(
- TM.getInstrInfo()->get(
+ TM->getInstrInfo()->get(
(mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD));
}
- } else if ((isLoadInst(TM, mi) || isStoreInst(TM, mi)) && isVolatileInst(mi)) {
+ } else if (isVolatileInst(mi) &&
+ (isPtrLoadInst(mi) || isPtrStoreInst(mi))) {
insertFence(MIB);
}
continue;
break;
- // Implement software emulated i8/i16 types by sign/zero extending
- // i8/i16 type operands of instructions.
- // To avoid generating too many sign/zero extensions, we do this only
- // where its needed:
- // sign/zero-extend i8/i16 type operands if the bits in the
- // upper bits affects the result of the instruction
- ExpandCaseToByteShortScalarTypes(AMDIL::CONTINUEC)
- ExpandCaseToByteShortScalarTypes(AMDIL::BREAKC)
- // ExpandCaseToByteShortScalarTypes(AMDIL::BRANCH_COND)
- ExpandCaseToByteShortScalarTypes(AMDIL::IFC) {
- // we are not generating the above currently:
- assert(0 && "unexpected instruction");
- break;
- }
- ExpandCaseToByteShortScalarTypes(AMDIL::BREAK_LOGICALNZ)
- ExpandCaseToByteShortScalarTypes(AMDIL::BREAK_LOGICALZ)
- ExpandCaseToByteShortScalarTypes(AMDIL::CONTINUE_LOGICALNZ)
- ExpandCaseToByteShortScalarTypes(AMDIL::CONTINUE_LOGICALZ)
- ExpandCaseToByteShortScalarTypes(AMDIL::IF_LOGICALNZ)
- ExpandCaseToByteShortScalarTypes(AMDIL::IF_LOGICALZ) {
- short opRegClassID = mi->getDesc().OpInfo[0].RegClass;
- if (useSWByteShortReg(opRegClassID)) {
- zeroExtend(MIB, 0);
- }
- break;
+ // Implement software emulated i8/i16 types by sign/zero extending
+ // i8/i16 type operands of instructions.
+ // To avoid generating too many sign/zero extensions, we do this only
+ // where its needed:
+ // sign/zero-extend i8/i16 type operands if the bits in the
+ // upper bits affects the result of the instruction
+ case AMDIL::CONTINUECi8rr:
+ case AMDIL::CONTINUECi16rr:
+ case AMDIL::BREAKCi8rr:
+ case AMDIL::BREAKCi16rr:
+ case AMDIL::IFCi8rr:
+ case AMDIL::IFCi16rr:
+ {
+ // we are not generating the above currently:
+ assert(0 && "unexpected instruction");
+ break;
+ }
+ case AMDIL::BREAK_LOGICALNZi16r:
+ case AMDIL::BREAK_LOGICALNZi8r:
+ case AMDIL::BREAK_LOGICALZi16r:
+ case AMDIL::BREAK_LOGICALZi8r:
+ case AMDIL::CONTINUE_LOGICALNZi16r:
+ case AMDIL::CONTINUE_LOGICALNZi8r:
+ case AMDIL::CONTINUE_LOGICALZi16r:
+ case AMDIL::CONTINUE_LOGICALZi8r:
+ case AMDIL::IF_LOGICALNZi16r:
+ case AMDIL::IF_LOGICALNZi8r:
+ case AMDIL::IF_LOGICALZi16r:
+ case AMDIL::IF_LOGICALZi8r:
+ {
+ short opRegClassID = mi->getDesc().OpInfo[0].RegClass;
+ if (useSWByteShortReg(opRegClassID)) {
+ zeroExtend(MIB, 0);
}
- ExpandCaseToByteShortScalarTypes(AMDIL::SELECTBIN)
- ExpandCaseToByteShortTypes(AMDIL::CMOVLOG)
- ExpandCaseToByteShortTypes(AMDIL::CMOV)
- // ExpandCaseToByteShortTypes(AMDIL::EADD)
- // find first hi/low bit
- ExpandCaseToByteShortTypes(AMDIL::IFFB_HI)
- ExpandCaseToByteShortTypes(AMDIL::IFFB_LO)
- ExpandCaseToByteShortTypes(AMDIL::USHR)
- ExpandCaseToByteShortTypes(AMDIL::USHRVEC) {
+ break;
+ }
+ GENERATE_SH2ARG_CASE(AMDIL::USHRi8i8)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv2i8i8)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv4i8i8)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRi8i32)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv2i8i32)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv4i8i32)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRi8i64)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv2i8i64)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRi16i16)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv2i16i16)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv4i16i16)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRi16i32)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv2i16i32)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv4i16i32)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRi16i64)
+ GENERATE_SH2ARG_CASE(AMDIL::USHRv2i16i64)
+ {
short opRegClassID = mi->getDesc().OpInfo[1].RegClass;
if (useSWByteShortReg(opRegClassID)) {
zeroExtend(MIB, 1);
}
break;
}
- ExpandCaseToByteShortTypes(AMDIL::NEGATE)
- ExpandCaseToByteShortTypes(AMDIL::SHR)
- ExpandCaseToByteShortTypes(AMDIL::SHRVEC) {
+ case AMDIL::NEGi8r:
+ case AMDIL::NEGi16r:
+ case AMDIL::NEGv2i8r:
+ case AMDIL::NEGv2i16r:
+ case AMDIL::NEGv4i8r:
+ case AMDIL::NEGv4i16r:
+ case AMDIL::NOTi8r:
+ case AMDIL::NOTi16r:
+ case AMDIL::NOTv2i8r:
+ case AMDIL::NOTv2i16r:
+ case AMDIL::NOTv4i8r:
+ case AMDIL::NOTv4i16r:
+ GENERATE_SH2ARG_CASE(AMDIL::SHRi8i8)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv2i8i8)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv4i8i8)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRi8i32)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv2i8i32)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv4i8i32)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRi8i64)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv2i8i64)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRi16i16)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv2i16i16)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv4i16i16)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRi16i32)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv2i16i32)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv4i16i32)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRi16i64)
+ GENERATE_SH2ARG_CASE(AMDIL::SHRv2i16i64)
+ {
short opRegClassID = mi->getDesc().OpInfo[1].RegClass;
if (useSWByteShortReg(opRegClassID)) {
signExtend(MIB, 1);
}
break;
}
- ExpandCaseToByteShortScalarTypes(AMDIL::MACRO__sdiv)
- ExpandCaseToByteShortScalarTypes(AMDIL::MACRO__smod)
- ExpandCaseToByteShortTypes(AMDIL::DIV_INF)
- ExpandCaseToByteShortTypes(AMDIL::SMAX)
- ExpandCaseToByteShortTypes(AMDIL::SMULHI)
- ExpandCaseToByteShortTypes(AMDIL::SMUL) {
+ GENERATE_2ARG_CASE(AMDIL::SMULi8)
+ GENERATE_2ARG_CASE(AMDIL::SMULv2i8)
+ GENERATE_2ARG_CASE(AMDIL::SMULv4i8)
+ GENERATE_2ARG_CASE(AMDIL::SMULi16)
+ GENERATE_2ARG_CASE(AMDIL::SMULv2i16)
+ GENERATE_2ARG_CASE(AMDIL::SMULv4i16)
+ GENERATE_2ARG_CASE(AMDIL::SMULHIi8)
+ GENERATE_2ARG_CASE(AMDIL::SMULHIv2i8)
+ GENERATE_2ARG_CASE(AMDIL::SMULHIv4i8)
+ GENERATE_2ARG_CASE(AMDIL::SMULHIi16)
+ GENERATE_2ARG_CASE(AMDIL::SMULHIv2i16)
+ GENERATE_2ARG_CASE(AMDIL::SMULHIv4i16)
+ case AMDIL::MACRO__sdiv_i16:
+ case AMDIL::MACRO__smod_i8:
+ case AMDIL::MACRO__smod_i16:
+#define CMPEXP(A) \
+ GENERATE_2ARG_CASE(A ## i8) \
+ GENERATE_2ARG_CASE(A ## v2i8) \
+ GENERATE_2ARG_CASE(A ## v4i8) \
+ GENERATE_2ARG_CASE(A ## i16) \
+ GENERATE_2ARG_CASE(A ## v2i16) \
+ GENERATE_2ARG_CASE(A ## v4i16)
+ CMPEXP(AMDIL::EQ)
+ CMPEXP(AMDIL::NE)
+ CMPEXP(AMDIL::LT)
+ CMPEXP(AMDIL::GT)
+ CMPEXP(AMDIL::ULT)
+ CMPEXP(AMDIL::UGT)
+ CMPEXP(AMDIL::LE)
+ CMPEXP(AMDIL::GE)
+ CMPEXP(AMDIL::ULE)
+#undef CMPEXP
+ {
short opRegClassID = mi->getDesc().OpInfo[1].RegClass;
- assert(opRegClassID == mi->getDesc().OpInfo[2].RegClass
- && "instruction ops have different type");
if (useSWByteShortReg(opRegClassID)) {
signExtend(MIB, 1);
signExtend(MIB, 2);
}
break;
}
- ExpandCaseToByteShortScalarTypes(AMDIL::MACRO__udiv)
- ExpandCaseToByteShortScalarTypes(AMDIL::MACRO__umod)
- ExpandCaseToByteShortTypes(AMDIL::UDIV)
- ExpandCaseToByteShortTypes(AMDIL::UMULHI) {
+ case AMDIL::MACRO__udiv_i8:
+ case AMDIL::MACRO__udiv_i16:
+ case AMDIL::MACRO__umod_i8:
+ case AMDIL::MACRO__umod_i16:
+ GENERATE_2ARG_CASE(AMDIL::UMULHIi8)
+ GENERATE_2ARG_CASE(AMDIL::UMULHIv2i8)
+ GENERATE_2ARG_CASE(AMDIL::UMULHIv4i8)
+ GENERATE_2ARG_CASE(AMDIL::UMULHIi16)
+ GENERATE_2ARG_CASE(AMDIL::UMULHIv2i16)
+ GENERATE_2ARG_CASE(AMDIL::UMULHIv4i16)
+ GENERATE_2ARG_CASE(AMDIL::UDIVi8)
+ GENERATE_2ARG_CASE(AMDIL::UDIVv2i8)
+ GENERATE_2ARG_CASE(AMDIL::UDIVv4i8)
+ GENERATE_2ARG_CASE(AMDIL::UDIVi16)
+ GENERATE_2ARG_CASE(AMDIL::UDIVv2i16)
+ GENERATE_2ARG_CASE(AMDIL::UDIVv4i16)
+ {
short opRegClassID = mi->getDesc().OpInfo[1].RegClass;
- assert(opRegClassID == mi->getDesc().OpInfo[2].RegClass
- && "instruction ops have different type");
if (useSWByteShortReg(opRegClassID)) {
zeroExtend(MIB, 1);
zeroExtend(MIB, 2);
}
break;
}
- // This works around a restriction in AMDIL where the
- // result of a comparison can only be in the lower
- // 2 components.
- case AMDIL::LEQ:
- case AMDIL::LGE:
- case AMDIL::LLE:
- case AMDIL::LGT:
- case AMDIL::LLT:
- case AMDIL::LNE:
- case AMDIL::ULLE:
- case AMDIL::ULGT:
- case AMDIL::ULGE:
- case AMDIL::ULLT: {
- if (isZWComponentReg(mi->getOperand(0).getReg())) {
- MachineInstr *newmi = BuildMI(MF, mi->getDebugLoc(),
- TM.getInstrInfo()->get(AMDIL::MOVE_i64),
- mi->getOperand(0).getReg()).addReg(AMDIL::Rxy1000);
- mi->getOperand(0).setReg(AMDIL::Rxy1000);
- mi->getParent()->insertAfter(MIB, newmi);
- }
- }
- break;
}
}
}
+
+ // cleanup
+ for (Reg2InstsMap::iterator I = sextMap.begin(), E = sextMap.end();
+ I != E; ++I) {
+ MachineInstVec* vec = I->second;
+ delete vec;
+ }
+ sextMap.clear();
+ for (Reg2InstsMap::iterator I = zextMap.begin(), E = zextMap.end();
+ I != E; ++I) {
+ MachineInstVec* vec = I->second;
+ delete vec;
+ }
+ zextMap.clear();
return Changed;
}
+#undef GENERATE_3ARG_CASE
+#undef GENERATE_SH2ARG_CASE
+#undef GENERATE_2ARG_CASE
const char*
AMDILMachinePeephole::getPassName() const
{
return "AMDIL Generic Machine Peephole Optimization Pass";
}
-
void
AMDILMachinePeephole::insertFence(MachineBasicBlock::iterator &MIB)
{
MachineInstr *MI = MIB;
MachineInstr *fence = BuildMI(*(MI->getParent()->getParent()),
MI->getDebugLoc(),
- TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1);
+ TM->getInstrInfo()->get(AMDIL::FENCEr)).addReg(
+ 1);
MI->getParent()->insert(MIB, fence);
fence = BuildMI(*(MI->getParent()->getParent()),
MI->getDebugLoc(),
- TM.getInstrInfo()->get(AMDIL::FENCE)).addReg(1);
+ TM->getInstrInfo()->get(AMDIL::FENCEr)).addReg(1);
MIB = MI->getParent()->insertAfter(MIB, fence);
}
-
// returns if the given register class is software emulated byte or short
bool AMDILMachinePeephole::useSWByteShortReg(short opRegClassID)
{
if ((opRegClassID == AMDIL::GPRI16RegClassID
|| opRegClassID == AMDIL::GPRV2I16RegClassID
|| opRegClassID == AMDIL::GPRV4I16RegClassID)
- && TM.getSubtarget<AMDILSubtarget>()
+ && TM->getSubtarget<AMDILSubtarget>()
.device()->usesSoftware(AMDILDeviceInfo::ShortOps)) {
return true;
}
if ((opRegClassID == AMDIL::GPRI8RegClassID
|| opRegClassID == AMDIL::GPRV2I8RegClassID
|| opRegClassID == AMDIL::GPRV4I8RegClassID)
- && TM.getSubtarget<AMDILSubtarget>()
+ && TM->getSubtarget<AMDILSubtarget>()
.device()->usesSoftware(AMDILDeviceInfo::ByteOps)) {
return true;
}
return false;
}
-
-uint32_t AMDILMachinePeephole::genVReg(uint32_t regType) const
-{
+uint32_t AMDILMachinePeephole::genVReg(uint32_t regType) const {
return MFP->getRegInfo().createVirtualRegister(getRegClassFromID(regType));
}
-
-MachineInstrBuilder
-AMDILMachinePeephole::generateMachineInst(uint32_t opcode,
- uint32_t dst,
- MachineBasicBlock::iterator &MIB)
-const
-{
- MachineInstr* mi = MIB;
- MachineBasicBlock* mb = mi->getParent();
- return BuildMI(*mb, MIB, mi->getDebugLoc(),
- TM.getInstrInfo()->get(opcode), dst);
-}
-
-MachineInstrBuilder
-AMDILMachinePeephole::generateMachineInst(uint32_t opcode,
- uint32_t dst,
- uint32_t src1,
- MachineBasicBlock::iterator &MIB)
-const
-{
- return generateMachineInst(opcode, dst, MIB).addReg(src1);
-}
-
-MachineInstrBuilder
-AMDILMachinePeephole::generateMachineInst(uint32_t opcode,
- uint32_t dst,
- uint32_t src1,
- uint32_t src2,
- MachineBasicBlock::iterator &MIB)
-const
+// Find a MachineInstr that uses the given register and has the given opcode.
+// Return NULL if not found.
+static inline MachineInstr* findRegUse(int opcode, uint32_t reg,
+ const MachineRegisterInfo& MRI)
{
- return generateMachineInst(opcode, dst, src1, MIB).addReg(src2);
+ for (MachineRegisterInfo::use_iterator it = MRI.use_begin(reg),
+ end = MRI.use_end();
+ it != end;
+ ++it) {
+ MachineInstr& useInst = *it;
+ if (useInst.getOpcode() != opcode) continue;
+ assert(useInst.getNumOperands() == 2 && "unexpected # of operands");
+ MachineOperand& op = useInst.getOperand(1);
+ if (op.isReg() && op.getReg() == reg) {
+ assert(op.isUse() && "op not use");
+ return &useInst;
+ }
+ }
+ return NULL;
}
-
-// Find a MachineInstr that uses the given register and has the given opcode.
+// Find a MachineInstr that uses the given register and immediate,
+// and has the given opcode.
// Return NULL if not found.
-static inline MachineInstr* findRegUse(uint32_t reg, int opcode,
+static inline MachineInstr* findRegUse(int opcode, uint32_t reg, int64_t imm,
const MachineRegisterInfo& MRI)
{
for (MachineRegisterInfo::use_iterator it = MRI.use_begin(reg),
@@ -347,24 +445,22 @@
it != end;
++it) {
MachineInstr& useInst = *it;
- if (useInst.getOpcode() == opcode) {
- for (MachineInstr::mop_iterator it2 = useInst.operands_begin(),
- end2 = useInst.operands_end();
- it2 != end2; ++it2) {
- MachineOperand& op = *it2;
- if (op.isUse() && op.isReg() && op.getReg() == reg) {
- return &useInst;
- }
- }
+ if (useInst.getOpcode() != opcode) continue;
+ assert(useInst.getNumOperands() == 3 && "unexpected # of operands");
+ MachineOperand& op1 = useInst.getOperand(1);
+ MachineOperand& op2 = useInst.getOperand(2);
+ assert(op1.isReg() && op1.isUse() && op2.isImm() && "unexpected op");
+ if (op1.getReg() == reg && op2.getImm() == imm) {
+ return &useInst;
}
}
return NULL;
}
-
// Find a MachineInstr that uses the given register and immediate,
// and has the given opcode.
// Return NULL if not found.
-static inline MachineInstr* findRegUse(uint32_t reg, int opcode, int64_t imm,
+static inline MachineInstr* findRegUse(int opcode, int64_t imm1,
+ int64_t imm2, uint32_t reg,
const MachineRegisterInfo& MRI)
{
for (MachineRegisterInfo::use_iterator it = MRI.use_begin(reg),
@@ -372,27 +468,20 @@
it != end;
++it) {
MachineInstr& useInst = *it;
- if (useInst.getOpcode() == opcode) {
- bool foundRegUse = false;
- bool foundImmUse = false;
- for (MachineInstr::mop_iterator it2 = useInst.operands_begin(),
- end2 = useInst.operands_end();
- it2 != end2; ++it2) {
- MachineOperand& op = *it2;
- if (op.isUse() && op.isReg() && op.getReg() == reg) {
- foundRegUse = true;
- } else if (op.isUse() && op.isImm() && op.getImm() == imm) {
- foundImmUse = true;
- }
- }
- if (foundRegUse && foundImmUse) {
- return &useInst;
- }
+ if (useInst.getOpcode() != opcode) continue;
+ assert(useInst.getNumOperands() == 4 && "unexpected # of operands");
+ MachineOperand& op1 = useInst.getOperand(1);
+ MachineOperand& op2 = useInst.getOperand(2);
+ MachineOperand& op3 = useInst.getOperand(3);
+ assert(op1.isImm() && op2.isImm() && op3.isReg() && op3.isUse()
+ && "unexpected op");
+ if (op1.getImm() == imm1 && op2.getImm() == imm2 && op3.getReg() == reg) {
+ return &useInst;
}
}
return NULL;
}
-
+// returns if the given MachineInstr defines exactly 1 register operand
// returns if the given MachineInstr defines exactly 1 register operand
static inline bool hasSingleRegDef(MachineInstr& inst)
{
@@ -401,7 +490,7 @@
end = inst.operands_end();
it != end; ++it) {
MachineOperand& op = *it;
- if (!op.isDef() || !op.isReg()) {
+ if (!op.isReg() || !op.isDef()) {
continue;
}
++nDefs;
@@ -411,7 +500,6 @@
}
return nDefs == 1;
}
-
// returns the first register this MachineInstr defines
static inline uint32_t firstDefReg(MachineInstr& inst)
{
@@ -426,165 +514,248 @@
assert(0 && "should not reach");
return 0;
}
-
// Find sign extension sequence such as the following:
-// reg1 = IL_ASINT_i8 reg
-// reg2 = SHL_i32 reg1, 24
-// reg3 = SHR_i32 reg2, 24
-// reg4 = IL_ASCHAR_i32 reg3
+// reg1 = IL_ASINTi8r reg
+// reg2 = SHLi32rr reg1, 24
+// reg3 = SHRi32rr reg2, 24
+// reg4 = IL_ASCHARi32r reg3
// or zero extension sequence such as the following:
-// reg1 = IL_ASINT_i8 reg
-// reg2 = BINARY_AND_i32 reg1, 0xff
-// reg3 = IL_ASCHAR_i32 reg2
+// reg1 = IL_ASINTi8r reg
+// reg2 = ANDi32rr reg1, 0xff
+// reg3 = IL_ASCHARi32r reg2
// The above sequence does sign/zero-extension to reg if reg is of type i8
-// Return the last instruction in the sequence
-// Return NULL if no such sequence found
-MachineInstr* AMDILMachinePeephole::findExtendInstruction(MachineOperand& op,
- bool isSigned)
+// Return the extension sequence through "extendSeq".
+// Return true if extension sequence is found, return false otherwise
+bool AMDILMachinePeephole::findExtendSequence(MachineOperand& op,
+ bool isSigned,
+ MachineInstVec& extendSeq)
{
unsigned opReg = op.getReg();
uint32_t regClassID = MFP->getRegInfo().getRegClass(opReg)->getID();
- int64_t constVal;
+ uint32_t nConsts;
+ int64_t constVal[2];
int promoteOp;
int demoteOp;
- int binaryAndOp;
- int shlOp;
- int shrOp;
+ int extendOps[2];
uint32_t intRegClassID;
- getExtendOpcodes(regClassID, isSigned, constVal, promoteOp, demoteOp,
- binaryAndOp, shlOp, shrOp, intRegClassID);
+ getExtendOpcodes(regClassID, isSigned, nConsts, constVal, promoteOp, demoteOp,
+ extendOps, intRegClassID);
const MachineRegisterInfo& MRI = MFP->getRegInfo();
- MachineInstr* promoteInst = findRegUse(opReg, promoteOp, MRI);
- if (promoteInst == NULL) return NULL;
- if (!hasSingleRegDef(*promoteInst)) return NULL;
+ extendSeq.clear();
+ MachineInstr* promoteInst = findRegUse(promoteOp, opReg, MRI);
+ if (promoteInst == NULL || !hasSingleRegDef(*promoteInst)) return false;
+ extendSeq.push_back(promoteInst);
uint32_t reg1 = firstDefReg(*promoteInst);
- uint32_t reg3;
- if (isSigned) {
- MachineInstr* shlInst = findRegUse(reg1, shlOp, constVal, MRI);
- if (shlInst == NULL) return NULL;
- if (!hasSingleRegDef(*shlInst)) return NULL;
- uint32_t reg2 = firstDefReg(*shlInst);
- MachineInstr* shrInst = findRegUse(reg2, shrOp, constVal, MRI);
- if (shrInst == NULL) return NULL;
- if (!hasSingleRegDef(*shrInst)) return NULL;
- reg3 = firstDefReg(*shrInst);
- } else {
- MachineInstr* andInst = findRegUse(reg1, binaryAndOp, constVal, MRI);
- if (andInst == NULL) return NULL;
- if (!hasSingleRegDef(*andInst)) return NULL;
- reg3 = firstDefReg(*andInst);
- }
- MachineInstr* demoteInst = findRegUse(reg3, demoteOp, MRI);
- if (demoteInst == NULL) return NULL;
- if (!hasSingleRegDef(*demoteInst)) return NULL;
- return demoteInst;
+ for (int i = 0; i < 2; ++i) {
+ if (extendOps[i] == AMDIL::INSTRUCTION_LIST_END) break;
+ MachineInstr* extendInst;
+ if (nConsts == 1) {
+ extendInst = findRegUse(extendOps[i], reg1, constVal[0], MRI);
+ } else {
+ extendInst = findRegUse(extendOps[i], constVal[0], constVal[1], reg1,
+ MRI);
+ }
+ if (extendInst == NULL || !hasSingleRegDef(*extendInst)) {
+ extendSeq.clear();
+ return false;
+ }
+ extendSeq.push_back(extendInst);
+ reg1 = firstDefReg(*extendInst);
+ }
+ MachineInstr* demoteInst = findRegUse(demoteOp, reg1, MRI);
+ if (demoteInst == NULL || !hasSingleRegDef(*demoteInst)) {
+ extendSeq.clear();
+ return false;
+ }
+ extendSeq.push_back(demoteInst);
+ return true;
}
-
// returns opcodes to be used to sign/zero extend the given register class
void
AMDILMachinePeephole::getExtendOpcodes(uint32_t regClassID,
bool isSigned,
- int64_t& constVal,
+ uint32_t& nConsts,
+ int64_t* constVal,
int& promoteOp,
int& demoteOp,
- int& binaryAndOp,
- int& shlOp,
- int& shrOp,
+ int* extendOps,
uint32_t& intRegClassID)
{
- switch(regClassID) {
+ const AMDILSubtarget *STM = &TM->getSubtarget<AMDILSubtarget>();
+ switch(regClassID)
+ {
default:
assert(0 && "unexpected reg class");
+ break;
+ case AMDIL::GPRI8RegClassID:
+ case AMDIL::GPRV2I8RegClassID:
+ case AMDIL::GPRV4I8RegClassID:
+ if (STM->device()->getGeneration() <= AMDILDeviceInfo::HD4XXX) {
+ constVal[0] = isSigned ? 24 : 0xFF;
+ nConsts = 1;
+ } else {
+ constVal[0] = 8;
+ constVal[1] = 0;
+ nConsts = 2;
+ }
+ break;
+ case AMDIL::GPRI16RegClassID:
+ case AMDIL::GPRV2I16RegClassID:
+ case AMDIL::GPRV4I16RegClassID:
+ if (STM->device()->getGeneration() <= AMDILDeviceInfo::HD4XXX) {
+ constVal[0] = isSigned ? 16 : 0xFFFF;
+ nConsts = 1;
+ } else {
+ constVal[0] = 16;
+ constVal[1] = 0;
+ nConsts = 2;
+ }
+ break;
+ }
+
+ extendOps[0] = AMDIL::INSTRUCTION_LIST_END;
+ extendOps[1] = AMDIL::INSTRUCTION_LIST_END;
+ switch(regClassID)
+ {
+ default:
+ break;
case AMDIL::GPRI8RegClassID:
- constVal = isSigned ? 24 : 0xFF;
- promoteOp = AMDIL::IL_ASINT_i8;
- demoteOp = AMDIL::IL_ASCHAR_i32;
- binaryAndOp = AMDIL::BINARY_AND_i32;
+ case AMDIL::GPRI16RegClassID:
intRegClassID = AMDIL::GPRI32RegClassID;
- shlOp = AMDIL::SHL_i32;
- shrOp = AMDIL::SHR_i32;
+ if (STM->device()->getGeneration() <= AMDILDeviceInfo::HD4XXX) {
+ extendOps[0] = isSigned ? AMDIL::SHLi32i32rr : AMDIL::ANDi32rr;
+ if (isSigned) extendOps[1] = AMDIL::SHRi32i32rr;
+ } else {
+ extendOps[0] = isSigned ? AMDIL::IBIT_EXTRACTi32iir
+ : AMDIL::UBIT_EXTRACTi32iir;
+ }
break;
case AMDIL::GPRV2I8RegClassID:
- constVal = isSigned ? 24 : 0xFF;
- promoteOp = AMDIL::IL_ASV2INT_v2i8;
- demoteOp = AMDIL::IL_ASV2CHAR_v2i32;
- binaryAndOp = AMDIL::BINARY_AND_v2i32;
+ case AMDIL::GPRV2I16RegClassID:
intRegClassID = AMDIL::GPRV2I32RegClassID;
- shlOp = AMDIL::SHLVEC_v2i32;
- shrOp = AMDIL::SHRVEC_v2i32;
+ if (STM->device()->getGeneration() <= AMDILDeviceInfo::HD4XXX) {
+ extendOps[0] = isSigned ? AMDIL::SHLv2i32i32rr : AMDIL::ANDv2i32rr;
+ if (isSigned) extendOps[1] = AMDIL::SHRv2i32i32rr;
+ } else {
+ extendOps[0] = isSigned ? AMDIL::IBIT_EXTRACTv2i32iir :
+ AMDIL::UBIT_EXTRACTv2i32iir;
+ }
break;
case AMDIL::GPRV4I8RegClassID:
- constVal = isSigned ? 24 : 0xFF;
- promoteOp = AMDIL::IL_ASV4INT_v4i8;
- demoteOp = AMDIL::IL_ASV4CHAR_v4i32;
- binaryAndOp = AMDIL::BINARY_AND_v4i32;
+ case AMDIL::GPRV4I16RegClassID:
intRegClassID = AMDIL::GPRV4I32RegClassID;
- shlOp = AMDIL::SHLVEC_v4i32;
- shrOp = AMDIL::SHRVEC_v4i32;
+ if (STM->device()->getGeneration() <= AMDILDeviceInfo::HD4XXX) {
+ extendOps[0] = isSigned ? AMDIL::SHLv4i32i32rr : AMDIL::ANDv4i32rr;
+ if (isSigned) extendOps[1] = AMDIL::SHRv4i32i32rr;
+ } else {
+ extendOps[0] = isSigned ? AMDIL::IBIT_EXTRACTv4i32iir :
+ AMDIL::UBIT_EXTRACTv4i32iir;
+ }
+ break;
+ }
+
+ switch(regClassID)
+ {
+ default:
+ break;
+ case AMDIL::GPRI8RegClassID:
+ promoteOp = AMDIL::IL_ASINTi8r;
+ demoteOp = AMDIL::IL_ASCHARi32r;
+ break;
+ case AMDIL::GPRV2I8RegClassID:
+ promoteOp = AMDIL::IL_ASV2INTv2i8r;
+ demoteOp = AMDIL::IL_ASV2CHARv2i32r;
+ break;
+ case AMDIL::GPRV4I8RegClassID:
+ promoteOp = AMDIL::IL_ASV4INTv4i8r;
+ demoteOp = AMDIL::IL_ASV4CHARv4i32r;
break;
case AMDIL::GPRI16RegClassID:
- constVal = isSigned ? 16 : 0xFFFF;
- promoteOp = AMDIL::IL_ASINT_i16;
- demoteOp = AMDIL::IL_ASSHORT_i32;
- binaryAndOp = AMDIL::BINARY_AND_i32;
- intRegClassID = AMDIL::GPRI32RegClassID;
- shlOp = AMDIL::SHL_i32;
- shrOp = AMDIL::SHR_i32;
+ promoteOp = AMDIL::IL_ASINTi16r;
+ demoteOp = AMDIL::IL_ASSHORTi32r;
break;
case AMDIL::GPRV2I16RegClassID:
- constVal = isSigned ? 16 : 0xFFFF;
- promoteOp = AMDIL::IL_ASV2INT_v2i16;
- demoteOp = AMDIL::IL_ASV2SHORT_v2i32;
- binaryAndOp = AMDIL::BINARY_AND_v2i32;
- intRegClassID = AMDIL::GPRV2I32RegClassID;
- shlOp = AMDIL::SHLVEC_v2i32;
- shrOp = AMDIL::SHRVEC_v2i32;
+ promoteOp = AMDIL::IL_ASV2INTv2i16r;
+ demoteOp = AMDIL::IL_ASV2SHORTv2i32r;
break;
case AMDIL::GPRV4I16RegClassID:
- constVal = isSigned ? 16 : 0xFFFF;
- promoteOp = AMDIL::IL_ASV4INT_v4i16;
- demoteOp = AMDIL::IL_ASV4SHORT_v4i32;
- binaryAndOp = AMDIL::BINARY_AND_v4i32;
- intRegClassID = AMDIL::GPRV4I32RegClassID;
- shlOp = AMDIL::SHLVEC_v4i32;
- shrOp = AMDIL::SHRVEC_v4i32;
+ promoteOp = AMDIL::IL_ASV4INTv4i16r;
+ demoteOp = AMDIL::IL_ASV4SHORTv4i32r;
break;
}
}
-
// create sequence of instructions to sign/zero extend the given register
uint32_t
-AMDILMachinePeephole::addExtendInstruction(MachineBasicBlock::iterator &MIB,
- uint32_t reg,
- bool isSigned)
+AMDILMachinePeephole::addExtendInstructions(MachineBasicBlock& BB,
+ MachineBasicBlock::instr_iterator I,
+ uint32_t reg,
+ bool isSigned,
+ MachineInstVec& extendSeq)
{
- int64_t constVal;
+ uint32_t nConsts;
+ int64_t constVal[2];
int promoteOp;
int demoteOp;
- int binaryAndOp;
- int shlOp;
- int shrOp;
+ int extendOps[2];
uint32_t intRegClassID;
uint32_t regClassID = MFP->getRegInfo().getRegClass(reg)->getID();
- getExtendOpcodes(regClassID, isSigned, constVal, promoteOp, demoteOp,
- binaryAndOp, shlOp, shrOp, intRegClassID);
+ getExtendOpcodes(regClassID, isSigned, nConsts, constVal, promoteOp, demoteOp,
+ extendOps, intRegClassID);
uint32_t constReg = genVReg(AMDIL::GPRI32RegClassID);
uint32_t intReg = genVReg(intRegClassID);
- uint32_t intReg2 = genVReg(intRegClassID);
+ uint32_t intReg2;
uint32_t dstReg = genVReg(regClassID);
- generateMachineInst(promoteOp, intReg, reg, MIB);
- generateMachineInst(AMDIL::LOADCONST_i32, constReg, MIB).addImm(constVal);
- if (isSigned) {
- uint32_t intReg3 = genVReg(intRegClassID);
- generateMachineInst(shlOp, intReg3, intReg, constReg, MIB);
- generateMachineInst(shrOp, intReg2, intReg3, constReg, MIB);
- } else {
- generateMachineInst(binaryAndOp, intReg2, intReg, constReg, MIB);
+ MachineInstrBuilder promoteInst
+ = BuildMI(BB, I, (*I).getDebugLoc(),
+ TM->getInstrInfo()->get(promoteOp), intReg).addReg(reg);
+ extendSeq.clear();
+ extendSeq.push_back(promoteInst);
+ for (int i = 0; i < 2; ++i) {
+ if (extendOps[i] == AMDIL::INSTRUCTION_LIST_END) break;
+ intReg2 = genVReg(intRegClassID);
+ MachineInstrBuilder extendInst
+ = BuildMI(BB, I, (*I).getDebugLoc(),
+ TM->getInstrInfo()->get(extendOps[i]), intReg2);
+ assert((nConsts == 1 || nConsts == 2) && "unexpected instruction type");
+ if ((extendOps[i] >= AMDIL::IBIT_EXTRACTi16iir
+ && extendOps[i] <= AMDIL::IBIT_EXTRACTv4i8iir)
+ || (extendOps[i] >= AMDIL::UBIT_EXTRACTi16iir
+ && extendOps[i] <= AMDIL::UBIT_EXTRACTv4i8iir)) {
+ assert(nConsts == 2 && "unexpected instruction type");
+ extendInst.addImm(constVal[0]);
+ extendInst.addImm(constVal[1]);
+ extendInst.addReg(intReg);
+ } else {
+ assert(nConsts == 1 && "unexpected instruction type");
+ extendInst.addReg(intReg);
+ extendInst.addImm(constVal[0]);
+ }
+ extendSeq.push_back(extendInst);
+ intReg = intReg2;
}
- generateMachineInst(demoteOp, dstReg, intReg2, MIB);
+ MachineInstrBuilder demoteInst
+ = BuildMI(BB, I, (*I).getDebugLoc(),
+ TM->getInstrInfo()->get(demoteOp), dstReg).addReg(intReg2);
+ extendSeq.push_back(demoteInst);
return dstReg;
}
-
+// move sequence of instructions to "BB" at "I"
+void AMDILMachinePeephole::moveInsts(MachineInstVec& insts,
+ MachineBasicBlock& BB,
+ MachineBasicBlock::instr_iterator I)
+{
+ for (MachineInstVec::iterator i = insts.begin(), e = insts.end();
+ i != e; ++i) {
+ MachineInstr* inst = *i;
+ DEBUG(dbgs() << "moving " << *inst << " from B#"
+ << inst->getParent()->getNumber() << " to B#"
+ << BB.getNumber() << "\n");
+ assert(DomTree->dominates(&BB, inst->getParent())
+ && "def not dominate use");
+ inst->removeFromParent();
+ BB.insert(I, inst);
+ }
+}
// sign/zero extend an operand of a MachineInstr by either reuse an existing
// sequence of sign/zero extension of the operand or by creating a new sequence.
void
@@ -594,11 +765,28 @@
{
MachineInstr* mi = MIB;
DEBUG(dbgs() << (isSigned ? "sign" : "zero") << " extending operand "
- << opIdx << " for " << *mi);
+ << opIdx << " for " << *mi);
MachineOperand& op = mi->getOperand(opIdx);
- assert(op.isReg() && op.isUse() && "extending non-register or def operand");
+ if (!op.isReg() || !op.isUse()) return;
uint32_t opReg = op.getReg();
- uint32_t newOpReg;
+ const MachineRegisterInfo& MRI = MFP->getRegInfo();
+ assert(MRI.isSSA() && "not SSA");
+ MachineBasicBlock* insertBB = NULL;
+ MachineBasicBlock::instr_iterator insertItr;
+ if (MRI.def_empty(opReg)) {
+ // if opReg is live-in, insert extension instructions in the entry BB
+ insertBB = &MFP->front();
+ insertItr = insertBB->instr_begin();
+ } else {
+ MachineInstr& opInst = *MRI.def_begin(opReg);
+ insertBB = opInst.getParent();
+ insertItr = MachineBasicBlock::instr_iterator(&opInst);
+ ++insertItr;
+ }
+ // should insert the extension instructions after all PHIs in the block
+ while (insertItr != insertBB->instr_end() && insertItr->isPHI()) {
+ ++insertItr;
+ }
assert((unsigned)MFP->getRegInfo().getRegClass(opReg)->getID()
== (unsigned)mi->getDesc().OpInfo[opIdx].RegClass
@@ -606,27 +794,40 @@
// first check the sext/zext map to see if it already has a sign/zero
// extention, if so, reuse it
- Reg2RegMap& map = isSigned ? sextMap : zextMap;
- Reg2RegMap::iterator it = map.find(opReg);
+ MachineInstVec* extendSeq = NULL;
+ Reg2InstsMap& map = isSigned ? sextMap : zextMap;
+ Reg2InstsMap::iterator it = map.find(opReg);
if (it != map.end()) {
DEBUG(dbgs() << "Found in map ");
- newOpReg = it->second;
+ extendSeq = it->second;
+ // if the extend sequence found does not dominate current instruction,
+ // move the sequence to the operand's block
+ if (!DomTree->dominates(extendSeq->back()->getParent(), mi->getParent())) {
+ moveInsts(*extendSeq, *insertBB, insertItr);
+ }
} else {
+ extendSeq = new MachineInstVec();
// not in the map. See if we can find in the DFG
- MachineInstr* extendInst = findExtendInstruction(op, isSigned);
- if (extendInst && hasSingleRegDef(*extendInst)) {
- newOpReg = firstDefReg(*extendInst);
+ bool found = findExtendSequence(op, isSigned, *extendSeq);
+ if (found) {
DEBUG(dbgs() << "Found in DFG ");
+ // if the extend sequence found does not dominate current instruction,
+ // move the sequence to the operand's block
+ if (!DomTree->dominates(extendSeq->back()->getParent(),
+ mi->getParent())) {
+ moveInsts(*extendSeq, *insertBB, insertItr);
+ }
} else {
// not in the DFG either. Create sign/zero extention.
- newOpReg = addExtendInstruction(MIB, opReg, isSigned);
+ addExtendInstructions(*insertBB, insertItr, opReg, isSigned, *extendSeq);
DEBUG(dbgs() << "Created ");
}
- map[opReg] = newOpReg;
+ map[opReg] = extendSeq;
}
+ assert(!extendSeq->empty() && "sanity");
+ uint32_t newOpReg = firstDefReg(*extendSeq->back());
DEBUG(dbgs() << (isSigned ? "sign" : "zero") << " extension vreg"
- << TargetRegisterInfo::virtReg2Index(newOpReg) << " for vreg"
- << TargetRegisterInfo::virtReg2Index(opReg) << "\n");
+ << TargetRegisterInfo::virtReg2Index(newOpReg) << " for vreg"
+ << TargetRegisterInfo::virtReg2Index(opReg) << "\n");
op.setReg(newOpReg);
}
-
More information about the llvm-branch-commits
mailing list