[llvm] r322402 - AMDGPU/SI: Add d16 support for buffer intrinsics.
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 12 13:12:19 PST 2018
Author: chfang
Date: Fri Jan 12 13:12:19 2018
New Revision: 322402
URL: http://llvm.org/viewvc/llvm-project?rev=322402&view=rev
Log:
AMDGPU/SI: Add d16 support for buffer intrinsics.
Differential Revision:
https://reviews.llvm.org/D38906
Reviewers:
Matt and Brian.
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll
llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-packed.s
llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-unpacked.s
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Fri Jan 12 13:12:19 2018
@@ -292,6 +292,12 @@ def FeatureIntClamp : SubtargetFeature<"
"Support clamp for integer destination"
>;
+def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
+ "HasUnpackedD16VMem",
+ "true",
+ "Has unpacked d16 vmem instructions"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -547,23 +553,27 @@ def FeatureISAVersion7_0_4 : SubtargetFe
def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
- FeatureSGPRInitBug]>;
+ FeatureSGPRInitBug,
+ FeatureUnpackedD16VMem]>;
def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
[FeatureVolcanicIslands,
FeatureFastFMAF32,
HalfRate64Ops,
FeatureLDSBankCount32,
- FeatureXNACK]>;
+ FeatureXNACK,
+ FeatureUnpackedD16VMem]>;
def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
[FeatureVolcanicIslands,
FeatureLDSBankCount32,
- FeatureSGPRInitBug]>;
+ FeatureSGPRInitBug,
+ FeatureUnpackedD16VMem]>;
def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
[FeatureVolcanicIslands,
- FeatureLDSBankCount32]>;
+ FeatureLDSBankCount32,
+ FeatureUnpackedD16VMem]>;
def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
[FeatureVolcanicIslands,
@@ -715,6 +725,11 @@ def HasFlatScratchInsts : Predicate<"Sub
def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<"FeatureGFX9Insts">;
+def HasUnpackedD16VMem : Predicate<"Subtarget->hasUnpackedD16VMem()">,
+ AssemblerPredicate<"FeatureUnpackedD16VMem">;
+def HasPackedD16VMem : Predicate<"!Subtarget->hasUnpackedD16VMem()">,
+ AssemblerPredicate<"!FeatureUnpackedD16VMem">;
+
def LDSRequiresM0Init : Predicate<"Subtarget->ldsRequiresM0Init()">;
def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Jan 12 13:12:19 2018
@@ -3976,14 +3976,18 @@ const char* AMDGPUTargetLowering::getTar
NODE_NAME_CASE(LOAD_CONSTANT)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT_X3)
+ NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
+ NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
NODE_NAME_CASE(ATOMIC_INC)
NODE_NAME_CASE(ATOMIC_DEC)
NODE_NAME_CASE(BUFFER_LOAD)
NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
+ NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
NODE_NAME_CASE(BUFFER_STORE)
NODE_NAME_CASE(BUFFER_STORE_FORMAT)
+ NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
NODE_NAME_CASE(BUFFER_ATOMIC_ADD)
NODE_NAME_CASE(BUFFER_ATOMIC_SUB)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Jan 12 13:12:19 2018
@@ -451,14 +451,18 @@ enum NodeType : unsigned {
LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,
TBUFFER_STORE_FORMAT_X3,
+ TBUFFER_STORE_FORMAT_D16,
TBUFFER_LOAD_FORMAT,
+ TBUFFER_LOAD_FORMAT_D16,
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,
BUFFER_LOAD,
BUFFER_LOAD_FORMAT,
+ BUFFER_LOAD_FORMAT_D16,
BUFFER_STORE,
BUFFER_STORE_FORMAT,
+ BUFFER_STORE_FORMAT_D16,
BUFFER_ATOMIC_SWAP,
BUFFER_ATOMIC_ADD,
BUFFER_ATOMIC_SUB,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Fri Jan 12 13:12:19 2018
@@ -61,7 +61,8 @@ enum SIEncodingFamily {
VI = 1,
SDWA = 2,
SDWA9 = 3,
- GFX9 = 4
+ GFX80 = 4,
+ GFX9 = 5
};
static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Fri Jan 12 13:12:19 2018
@@ -162,6 +162,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
FlatGlobalInsts(false),
FlatScratchInsts(false),
AddNoCarryInsts(false),
+ HasUnpackedD16VMem(false),
R600ALUInst(false),
CaymanISA(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Jan 12 13:12:19 2018
@@ -165,6 +165,7 @@ protected:
bool FlatGlobalInsts;
bool FlatScratchInsts;
bool AddNoCarryInsts;
+ bool HasUnpackedD16VMem;
bool R600ALUInst;
bool CaymanISA;
bool CFALUBug;
@@ -481,6 +482,10 @@ public:
return AddNoCarryInsts;
}
+ bool hasUnpackedD16VMem() const {
+ return HasUnpackedD16VMem;
+ }
+
bool isMesaKernel(const MachineFunction &MF) const {
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction().getCallingConv());
}
Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Fri Jan 12 13:12:19 2018
@@ -671,6 +671,61 @@ defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pse
defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores <
"buffer_store_format_xyzw", VReg_128
>;
+
+let SubtargetPredicate = HasUnpackedD16VMem in {
+ defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads <
+ "buffer_load_format_d16_x", VGPR_32
+ >;
+ defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Loads <
+ "buffer_load_format_d16_xy", VReg_64
+ >;
+ defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Loads <
+ "buffer_load_format_d16_xyz", VReg_96
+ >;
+ defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Loads <
+ "buffer_load_format_d16_xyzw", VReg_128
+ >;
+ defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Stores <
+ "buffer_store_format_d16_x", VGPR_32
+ >;
+ defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Pseudo_Stores <
+ "buffer_store_format_d16_xy", VReg_64
+ >;
+ defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Pseudo_Stores <
+ "buffer_store_format_d16_xyz", VReg_96
+ >;
+ defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Pseudo_Stores <
+ "buffer_store_format_d16_xyzw", VReg_128
+ >;
+} // End HasUnpackedD16VMem.
+
+let SubtargetPredicate = HasPackedD16VMem in {
+ defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads <
+ "buffer_load_format_d16_x", VGPR_32
+ >;
+ defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Pseudo_Loads <
+ "buffer_load_format_d16_xy", VGPR_32
+ >;
+ defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Pseudo_Loads <
+ "buffer_load_format_d16_xyz", VReg_64
+ >;
+ defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Pseudo_Loads <
+ "buffer_load_format_d16_xyzw", VReg_64
+ >;
+ defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Pseudo_Stores <
+ "buffer_store_format_d16_x", VGPR_32
+ >;
+ defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Pseudo_Stores <
+ "buffer_store_format_d16_xy", VGPR_32
+ >;
+ defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Pseudo_Stores <
+ "buffer_store_format_d16_xyz", VReg_64
+ >;
+ defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Pseudo_Stores <
+ "buffer_store_format_d16_xyzw", VReg_64
+ >;
+} // End HasPackedD16VMem.
+
defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads <
"buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8
>;
@@ -860,6 +915,28 @@ defm TBUFFER_STORE_FORMAT_XY : MTBUF_P
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>;
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
+let SubtargetPredicate = HasUnpackedD16VMem in {
+ defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
+ defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128>;
+ defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>;
+ defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>;
+} // End HasUnpackedD16VMem.
+
+let SubtargetPredicate = HasPackedD16VMem in {
+ defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
+ defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64>;
+ defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>;
+ defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>;
+} // End HasPackedD16VMem.
+
let SubtargetPredicate = isCIVI in {
//===----------------------------------------------------------------------===//
@@ -922,6 +999,20 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatt
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
+
+let SubtargetPredicate = HasUnpackedD16VMem in {
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f16, "BUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XY_gfx80">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4i32, "BUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
+} // End HasUnpackedD16VMem.
+
+let SubtargetPredicate = HasPackedD16VMem in {
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f16, "BUFFER_LOAD_FORMAT_D16_X">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_XY">;
+ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XYZW">;
+} // End HasPackedD16VMem.
+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
@@ -969,6 +1060,20 @@ multiclass MUBUF_StoreIntrinsicPat<SDPat
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
+
+let SubtargetPredicate = HasUnpackedD16VMem in {
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XY_gfx80">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4i32, "BUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
+} // End HasUnpackedD16VMem.
+
+let SubtargetPredicate = HasPackedD16VMem in {
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_XY">;
+ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XYZW">;
+} // End HasPackedD16VMem.
+
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
@@ -1382,6 +1487,19 @@ defm : MTBUF_LoadIntrinsicPat<SItbuffer_
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f32, "TBUFFER_LOAD_FORMAT_XY">;
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">;
+let SubtargetPredicate = HasUnpackedD16VMem in {
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
+} // End HasUnpackedD16VMem.
+
+let SubtargetPredicate = HasPackedD16VMem in {
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_XY">;
+ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XYZW">;
+} // End HasPackedD16VMem.
+
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
@@ -1431,6 +1549,19 @@ defm : MTBUF_StoreIntrinsicPat<SItbuffer
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_x3, v4f32, "TBUFFER_STORE_FORMAT_XYZ">;
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store, v4f32, "TBUFFER_STORE_FORMAT_XYZW">;
+let SubtargetPredicate = HasUnpackedD16VMem in {
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X_gfx80">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XY_gfx80">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4i32, "TBUFFER_STORE_FORMAT_D16_XYZW_gfx80">;
+} // End HasUnpackedD16VMem.
+
+let SubtargetPredicate = HasPackedD16VMem in {
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_XY">;
+ defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XYZW">;
+} // End HasPackedD16VMem.
+
//===----------------------------------------------------------------------===//
// Target instructions, move to the appropriate target TD file
//===----------------------------------------------------------------------===//
@@ -1628,6 +1759,35 @@ multiclass MUBUF_Real_AllAddr_vi<bits<7>
def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
}
+class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
+ MUBUF_Real<op, ps>,
+ Enc64,
+ SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> {
+ let AssemblerPredicate=HasUnpackedD16VMem;
+ let DecoderNamespace="GFX80_UNPACKED";
+
+ let Inst{11-0} = !if(ps.has_offset, offset, ?);
+ let Inst{12} = ps.offen;
+ let Inst{13} = ps.idxen;
+ let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{16} = lds;
+ let Inst{17} = !if(ps.has_slc, slc, ?);
+ let Inst{24-18} = op;
+ let Inst{31-26} = 0x38; //encoding
+ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
+ let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+ let Inst{55} = !if(ps.has_tfe, tfe, ?);
+ let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
+}
+
+multiclass MUBUF_Real_AllAddr_gfx80<bits<7> op> {
+ def _OFFSET_vi : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _OFFEN_vi : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_vi : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_vi : MUBUF_Real_gfx80 <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+}
+
multiclass MUBUF_Real_Atomic_vi<bits<7> op> :
MUBUF_Real_AllAddr_vi<op> {
def _OFFSET_RTN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
@@ -1644,6 +1804,26 @@ defm BUFFER_STORE_FORMAT_X : MUBUF_
defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_vi <0x05>;
defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_vi <0x06>;
defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_vi <0x07>;
+let SubtargetPredicate = HasUnpackedD16VMem in {
+ defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x08>;
+ defm BUFFER_LOAD_FORMAT_D16_XY_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x09>;
+ defm BUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0a>;
+ defm BUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0b>;
+ defm BUFFER_STORE_FORMAT_D16_X_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0c>;
+ defm BUFFER_STORE_FORMAT_D16_XY_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0d>;
+ defm BUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0e>;
+ defm BUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MUBUF_Real_AllAddr_gfx80 <0x0f>;
+} // End HasUnpackedD16VMem.
+let SubtargetPredicate = HasPackedD16VMem in {
+ defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_vi <0x08>;
+ defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_vi <0x09>;
+ defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0a>;
+ defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0b>;
+ defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_vi <0x0c>;
+ defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_vi <0x0d>;
+ defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_vi <0x0e>;
+ defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_vi <0x0f>;
+} // End HasPackedD16VMem.
defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_vi <0x10>;
defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_vi <0x11>;
defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_vi <0x12>;
@@ -1729,11 +1909,61 @@ multiclass MTBUF_Real_AllAddr_vi<bits<4>
def _BOTHEN_vi : MTBUF_Real_vi <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
}
-defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0>;
-defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <1>;
-//defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <2>;
-defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <3>;
-defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <4>;
-defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <5>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <6>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <7>;
+class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
+ MTBUF_Real<ps>,
+ Enc64,
+ SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX80> {
+ let AssemblerPredicate=HasUnpackedD16VMem;
+ let DecoderNamespace="GFX80_UNPACKED";
+
+ let Inst{11-0} = !if(ps.has_offset, offset, ?);
+ let Inst{12} = ps.offen;
+ let Inst{13} = ps.idxen;
+ let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
+ let Inst{18-15} = op;
+ let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
+ let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{31-26} = 0x3a; //encoding
+ let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+ let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
+ let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+ let Inst{54} = !if(ps.has_slc, slc, ?);
+ let Inst{55} = !if(ps.has_tfe, tfe, ?);
+ let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
+}
+
+multiclass MTBUF_Real_AllAddr_gfx80<bits<4> op> {
+ def _OFFSET_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>;
+ def _OFFEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN")>;
+ def _IDXEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN")>;
+ def _BOTHEN_gfx80 : MTBUF_Real_gfx80 <op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>;
+}
+
+defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_vi <0x00>;
+defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_vi <0x01>;
+defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <0x02>;
+defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <0x03>;
+defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_vi <0x04>;
+defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_vi <0x05>;
+defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_vi <0x06>;
+defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_vi <0x07>;
+let SubtargetPredicate = HasUnpackedD16VMem in {
+ defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x08>;
+ defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x09>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0a>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0b>;
+ defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0c>;
+ defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0d>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0e>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Real_AllAddr_gfx80 <0x0f>;
+} // End HasUnpackedD16VMem.
+let SubtargetPredicate = HasPackedD16VMem in {
+ defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_vi <0x08>;
+ defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_vi <0x09>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0a>;
+ defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0b>;
+ defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_vi <0x0c>;
+ defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_vi <0x0d>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_vi <0x0e>;
+ defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_vi <0x0f>;
+} // End HasUnpackedD16VMem.
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jan 12 13:12:19 2018
@@ -207,11 +207,14 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::v4f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v4f16, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -3501,6 +3504,78 @@ SDValue SITargetLowering::LowerOperation
return SDValue();
}
+static SDValue adjustLoadValueType(SDValue Result, EVT LoadVT, SDLoc DL,
+ SelectionDAG &DAG, bool Unpacked) {
+ if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16.
+ // Truncate to v2i16/v4i16.
+ EVT IntLoadVT = LoadVT.changeTypeToInteger();
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, IntLoadVT, Result);
+ // Bitcast to original type (v2f16/v4f16).
+ return DAG.getNode(ISD::BITCAST, DL, LoadVT, Trunc);
+ }
+ // Cast back to the original packed type.
+ return DAG.getNode(ISD::BITCAST, DL, LoadVT, Result);
+}
+
+// This is to lower INTRINSIC_W_CHAIN with illegal result types.
+SDValue SITargetLowering::lowerIntrinsicWChain_IllegalReturnType(SDValue Op,
+ SDValue &Chain, SelectionDAG &DAG) const {
+ EVT LoadVT = Op.getValueType();
+ // TODO: handle v3f16.
+ if (LoadVT != MVT::v2f16 && LoadVT != MVT::v4f16)
+ return SDValue();
+
+ bool Unpacked = Subtarget->hasUnpackedD16VMem();
+ EVT UnpackedLoadVT = (LoadVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
+ EVT EquivLoadVT = Unpacked ? UnpackedLoadVT :
+ getEquivalentMemType(*DAG.getContext(), LoadVT);
+ // Change from v4f16/v2f16 to EquivLoadVT.
+ SDVTList VTList = DAG.getVTList(EquivLoadVT, MVT::Other);
+
+ SDValue Res;
+ SDLoc DL(Op);
+ MemSDNode *M = cast<MemSDNode>(Op);
+ unsigned IID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (IID) {
+ case Intrinsic::amdgcn_tbuffer_load: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Op.getOperand(4), // voffset
+ Op.getOperand(5), // soffset
+ Op.getOperand(6), // offset
+ Op.getOperand(7), // dfmt
+ Op.getOperand(8), // nfmt
+ Op.getOperand(9), // glc
+ Op.getOperand(10) // slc
+ };
+ Res = DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, DL,
+ VTList, Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ Chain = Res.getValue(1);
+ return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
+ }
+ case Intrinsic::amdgcn_buffer_load_format: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Op.getOperand(4), // offset
+ Op.getOperand(5), // glc
+ Op.getOperand(6) // slc
+ };
+ Res = DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
+ DL, VTList, Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ Chain = Res.getValue(1);
+ return adjustLoadValueType(Res, LoadVT, DL, DAG, Unpacked);
+ }
+ default:
+ return SDValue();
+ }
+}
+
void SITargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -3528,6 +3603,16 @@ void SITargetLowering::ReplaceNodeResult
}
break;
}
+ case ISD::INTRINSIC_W_CHAIN: {
+ SDValue Chain;
+ if (SDValue Res = lowerIntrinsicWChain_IllegalReturnType(SDValue(N, 0),
+ Chain, DAG)) {
+ Results.push_back(Res);
+ Results.push_back(Chain);
+ return;
+ }
+ break;
+ }
case ISD::SELECT: {
SDLoc SL(N);
EVT VT = N->getValueType(0);
@@ -4656,6 +4741,31 @@ SDValue SITargetLowering::LowerINTRINSIC
}
}
+SDValue SITargetLowering::handleD16VData(SDValue VData,
+ SelectionDAG &DAG) const {
+ EVT StoreVT = VData.getValueType();
+ SDLoc DL(VData);
+
+ if (StoreVT.isVector()) {
+ assert ((StoreVT.getVectorNumElements() != 3) && "Handle v3f16");
+ if (!Subtarget->hasUnpackedD16VMem()) {
+ if (!isTypeLegal(StoreVT)) {
+ // If Target supports packed vmem, we just need to workaround
+ // the illegal type by casting to an equivalent one.
+ EVT EquivStoreVT = getEquivalentMemType(*DAG.getContext(), StoreVT);
+ return DAG.getNode(ISD::BITCAST, DL, EquivStoreVT, VData);
+ }
+ } else { // We need to unpack the packed data to store.
+ EVT IntStoreVT = StoreVT.changeTypeToInteger();
+ SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);
+ EVT EquivStoreVT = (StoreVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData);
+ }
+ }
+ // No change for f16 and legal vector D16 types.
+ return VData;
+}
+
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -4798,9 +4908,13 @@ SDValue SITargetLowering::LowerINTRINSIC
}
case Intrinsic::amdgcn_tbuffer_store: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
SDValue Ops[] = {
Chain,
- Op.getOperand(2), // vdata
+ VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
Op.getOperand(5), // voffset
@@ -4811,37 +4925,34 @@ SDValue SITargetLowering::LowerINTRINSIC
Op.getOperand(10), // glc
Op.getOperand(11) // slc
};
- EVT VT = Op.getOperand(3).getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore,
- VT.getStoreSize(), 4);
- return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops, VT, MMO);
+ unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+ AMDGPUISD::TBUFFER_STORE_FORMAT;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_buffer_store:
case Intrinsic::amdgcn_buffer_store_format: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
SDValue Ops[] = {
Chain,
- Op.getOperand(2), // vdata
+ VData, // vdata
Op.getOperand(3), // rsrc
Op.getOperand(4), // vindex
Op.getOperand(5), // offset
Op.getOperand(6), // glc
Op.getOperand(7) // slc
};
- EVT VT = Op.getOperand(3).getValueType();
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(),
- MachineMemOperand::MOStore |
- MachineMemOperand::MODereferenceable,
- VT.getStoreSize(), 4);
-
- unsigned Opcode = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
- AMDGPUISD::BUFFER_STORE :
- AMDGPUISD::BUFFER_STORE_FORMAT;
- return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
+ unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
+ AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+ Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
}
default:
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Fri Jan 12 13:12:19 2018
@@ -60,6 +60,10 @@ class SITargetLowering final : public AM
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerIntrinsicWChain_IllegalReturnType(SDValue Op, SDValue &Chain,
+ SelectionDAG &DAG) const;
+ SDValue handleD16VData(SDValue VData, SelectionDAG &DAG) const;
+
/// \brief Converts \p Op, which must be of floating point type, to the
/// floating point type \p VT, by either extending or truncating it.
SDValue getFPExtOrFPTrunc(SelectionDAG &DAG,
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=322402&r1=322401&r2=322402&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Fri Jan 12 13:12:19 2018
@@ -25,7 +25,8 @@ def SIEncodingFamily {
int VI = 1;
int SDWA = 2;
int SDWA9 = 3;
- int GFX9 = 4;
+ int GFX80 = 4;
+ int GFX9 = 5;
}
//===----------------------------------------------------------------------===//
@@ -45,21 +46,24 @@ def SIatomic_dec : SDNode<"AMDGPUISD::AT
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
-def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT",
- SDTypeProfile<1, 9,
- [ // vdata
- SDTCisVT<1, v4i32>, // rsrc
- SDTCisVT<2, i32>, // vindex(VGPR)
- SDTCisVT<3, i32>, // voffset(VGPR)
- SDTCisVT<4, i32>, // soffset(SGPR)
- SDTCisVT<5, i32>, // offset(imm)
- SDTCisVT<6, i32>, // dfmt(imm)
- SDTCisVT<7, i32>, // nfmt(imm)
- SDTCisVT<8, i32>, // glc(imm)
- SDTCisVT<9, i32> // slc(imm)
- ]>,
- [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
->;
+def SDTbuffer_load : SDTypeProfile<1, 9,
+ [ // vdata
+ SDTCisVT<1, v4i32>, // rsrc
+ SDTCisVT<2, i32>, // vindex(VGPR)
+ SDTCisVT<3, i32>, // voffset(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // offset(imm)
+ SDTCisVT<6, i32>, // dfmt(imm)
+ SDTCisVT<7, i32>, // nfmt(imm)
+ SDTCisVT<8, i32>, // glc(imm)
+ SDTCisVT<9, i32> // slc(imm)
+ ]>;
+
+def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTbuffer_load,
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
+def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
+ SDTbuffer_load,
+ [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
def SDTtbuffer_store : SDTypeProfile<0, 10,
[ // vdata
@@ -79,6 +83,9 @@ def SItbuffer_store : SDNode<"AMDGPUISD:
def SItbuffer_store_x3 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_X3",
SDTtbuffer_store,
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
+ SDTtbuffer_store,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
def SDTBufferLoad : SDTypeProfile<1, 5,
[ // vdata
@@ -92,6 +99,9 @@ def SIbuffer_load : SDNode <"AMDGPUISD::
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
+ SDTBufferLoad,
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
def SDTBufferStore : SDTypeProfile<0, 6,
[ // vdata
@@ -102,9 +112,13 @@ def SDTBufferStore : SDTypeProfile<0, 6,
SDTCisVT<5, i1>]>; // slc
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
- [SDNPMemOperand, SDNPHasChain, SDNPMayStore]>;
-def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", SDTBufferStore,
- [SDNPMemOperand, SDNPHasChain, SDNPMayStore]>;
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
+ SDTBufferStore,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
+def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
+ SDTBufferStore,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
class SDBufferAtomic<string opcode> : SDNode <opcode,
SDTypeProfile<1, 5,
@@ -1882,6 +1896,11 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.VI)],
[!cast<string>(SIEncodingFamily.SDWA)],
[!cast<string>(SIEncodingFamily.SDWA9)],
+ // GFX80 encoding is added to work around a multiple matching
+ // issue for buffer instructions with unpacked d16 data. This
+ // does not actually change the encoding, and thus may be
+ // removed later.
+ [!cast<string>(SIEncodingFamily.GFX80)],
[!cast<string>(SIEncodingFamily.GFX9)]];
}
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll?rev=322402&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll Fri Jan 12 13:12:19 2018
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s
+
+; GCN-LABEL: {{^}}buffer_load_format_d16_x:
+; GCN: buffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0
+define amdgpu_ps half @buffer_load_format_d16_x(<4 x i32> inreg %rsrc) {
+main_body:
+ %data = call half @llvm.amdgcn.buffer.load.format.f16(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
+ ret half %data
+}
+
+; GCN-LABEL: {{^}}buffer_load_format_d16_xy:
+; UNPACKED: buffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
+
+; PACKED: buffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0
+; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]]
+define amdgpu_ps half @buffer_load_format_d16_xy(<4 x i32> inreg %rsrc) {
+main_body:
+ %data = call <2 x half> @llvm.amdgcn.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
+ %elt = extractelement <2 x half> %data, i32 1
+ ret half %elt
+}
+
+; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw:
+; UNPACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
+
+; PACKED: buffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], 0
+; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
+define amdgpu_ps half @buffer_load_format_d16_xyzw(<4 x i32> inreg %rsrc) {
+main_body:
+ %data = call <4 x half> @llvm.amdgcn.buffer.load.format.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
+ %elt = extractelement <4 x half> %data, i32 3
+ ret half %elt
+}
+
+declare half @llvm.amdgcn.buffer.load.format.f16(<4 x i32>, i32, i32, i1, i1)
+declare <2 x half> @llvm.amdgcn.buffer.load.format.v2f16(<4 x i32>, i32, i32, i1, i1)
+declare <4 x half> @llvm.amdgcn.buffer.load.format.v4f16(<4 x i32>, i32, i32, i1, i1)
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll?rev=322402&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll Fri Jan 12 13:12:19 2018
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+
+; GCN-LABEL: {{^}}buffer_store_format_d16_x:
+; GCN: v_trunc_f16_e32 v[[LO:[0-9]+]], s{{[0-9]+}}
+; GCN: buffer_store_format_d16_x v[[LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+define amdgpu_kernel void @buffer_store_format_d16_x(<4 x i32> %rsrc, half %data, i32 %index) {
+main_body:
+ call void @llvm.amdgcn.buffer.store.format.f16(half %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}buffer_store_format_d16_xy:
+
+; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: buffer_store_format_d16_xy v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+
+; PACKED: buffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+define amdgpu_kernel void @buffer_store_format_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %index) {
+main_body:
+ call void @llvm.amdgcn.buffer.store.format.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0)
+ ret void
+}
+
+; GCN-LABEL: {{^}}buffer_store_format_d16_xyzw:
+
+; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+
+; GFX81: v_or_b32_e32 v[[HI:[0-9]+]]
+; GFX81: v_or_b32_e32 v[[LO:[0-9]+]]
+
+; GFX9: v_mov_b32_e32 v[[LO:[0-9]+]]
+; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]]
+
+; PACKED: buffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
+define amdgpu_kernel void @buffer_store_format_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
+main_body:
+ call void @llvm.amdgcn.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0)
+ ret void
+}
+
+declare void @llvm.amdgcn.buffer.store.format.f16(half, <4 x i32>, i32, i32, i1, i1)
+declare void @llvm.amdgcn.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i1, i1)
+declare void @llvm.amdgcn.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i1, i1)
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll?rev=322402&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll Fri Jan 12 13:12:19 2018
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s
+
+; GCN-LABEL: {{^}}tbuffer_load_d16_x:
+; GCN: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
+main_body:
+ %data = call half @llvm.amdgcn.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0)
+ ret half %data
+}
+
+; GCN-LABEL: {{^}}tbuffer_load_d16_xy:
+; UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
+
+; PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]]
+define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
+main_body:
+ %data = call <2 x half> @llvm.amdgcn.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0)
+ %elt = extractelement <2 x half> %data, i32 1
+ ret half %elt
+}
+
+; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw:
+; UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
+
+; PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
+define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
+main_body:
+ %data = call <4 x half> @llvm.amdgcn.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0, i32 6, i32 1, i1 0, i1 0)
+ %elt = extractelement <4 x half> %data, i32 3
+ ret half %elt
+}
+
+declare half @llvm.amdgcn.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
+declare <2 x half> @llvm.amdgcn.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
+declare <4 x half> @llvm.amdgcn.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll?rev=322402&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll Fri Jan 12 13:12:19 2018
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+
+
+; GCN-LABEL: {{^}}tbuffer_store_d16_x:
+; GCN: v_trunc_f16_e32 v[[LO:[0-9]+]], s{{[0-9]+}}
+; GCN: tbuffer_store_format_d16_x v[[LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data, i32 %vindex) {
+main_body:
+ call void @llvm.amdgcn.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0)
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}tbuffer_store_d16_xy:
+
+; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+
+; PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %vindex) {
+main_body:
+ call void @llvm.amdgcn.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0)
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw:
+
+; UNPACKED: flat_load_ushort v[[HI:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: flat_load_ushort v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: flat_load_ushort v[[LO:[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc slc
+; UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+
+; GFX81: v_or_b32_e32 v[[HI:[0-9]+]]
+; GFX81: v_or_b32_e32 v[[LO:[0-9]+]]
+
+; GFX9: v_mov_b32_e32 v[[LO:[0-9]+]]
+; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]]
+
+; PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
+main_body:
+ call void @llvm.amdgcn.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0)
+ ret void
+}
+
+declare void @llvm.amdgcn.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
+declare void @llvm.amdgcn.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
+declare void @llvm.amdgcn.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
Added: llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-packed.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-packed.s?rev=322402&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-packed.s (added)
+++ llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-packed.s Fri Jan 12 13:12:19 2018
@@ -0,0 +1,74 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=PACKED %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=PACKED %s
+
+// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding 2>&1 %s | FileCheck -check-prefix=UNPACKED-ERR -check-prefix=GCN-ERR %s
+
+
+//===----------------------------------------------------------------------===//
+// Buffer Format Instructions.
+//===----------------------------------------------------------------------===//
+
+buffer_load_format_d16_x v1, off, s[4:7], s1
+// PACKED: buffer_load_format_d16_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_format_d16_xy v1, off, s[4:7], s1
+// PACKED: buffer_load_format_d16_xy v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+buffer_load_format_d16_xyz v[1:2], off, s[4:7], s1
+// PACKED: buffer_load_format_d16_xyz v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+buffer_load_format_d16_xyzw v[1:2], off, s[4:7], s1
+// PACKED: buffer_load_format_d16_xyzw v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+buffer_store_format_d16_x v1, off, s[4:7], s1
+// PACKED: buffer_store_format_d16_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_format_d16_xy v1, off, s[4:7], s1
+// PACKED: buffer_store_format_d16_xy v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+buffer_store_format_d16_xyz v[1:2], off, s[4:7], s1
+// PACKED: buffer_store_format_d16_xyz v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+buffer_store_format_d16_xyzw v[1:2], off, s[4:7], s1
+// PACKED: buffer_store_format_d16_xyzw v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x3c,0xe0,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+
+//===----------------------------------------------------------------------===//
+// TBuffer Format Instructions.
+//===----------------------------------------------------------------------===//
+
+tbuffer_load_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1
+// PACKED: tbuffer_load_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7c,0xe9,0x00,0x01,0x01,0x01]
+
+tbuffer_load_format_d16_xy v1, off, s[4:7], dfmt:15, nfmt:2, s1
+// PACKED: tbuffer_load_format_d16_xy v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7c,0xe9,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_load_format_d16_xyz v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
+// PACKED: tbuffer_load_format_d16_xyz v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7d,0xe9,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_load_format_d16_xyzw v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
+// PACKED: tbuffer_load_format_d16_xyzw v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7d,0xe9,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_store_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1
+// PACKED: tbuffer_store_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7e,0xe9,0x00,0x01,0x01,0x01]
+
+tbuffer_store_format_d16_xy v1, off, s[4:7], dfmt:15, nfmt:2, s1
+// PACKED: tbuffer_store_format_d16_xy v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7e,0xe9,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_store_format_d16_xyz v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
+// PACKED: tbuffer_store_format_d16_xyz v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_store_format_d16_xyzw v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
+// PACKED: tbuffer_store_format_d16_xyzw v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7f,0xe9,0x00,0x01,0x01,0x01]
+// UNPACKED-ERR: error: instruction not supported on this GPU
Added: llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-unpacked.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-unpacked.s?rev=322402&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-unpacked.s (added)
+++ llvm/trunk/test/MC/AMDGPU/buf-fmt-d16-unpacked.s Fri Jan 12 13:12:19 2018
@@ -0,0 +1,73 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=PACKED-ERR -check-prefix=GCN-ERR %s
+
+
+//===----------------------------------------------------------------------===//
+// Buffer Format Instructions.
+//===----------------------------------------------------------------------===//
+
+buffer_load_format_d16_x v1, off, s[4:7], s1
+// UNPACKED: buffer_load_format_d16_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_load_format_d16_xy v[1:2], off, s[4:7], s1
+// UNPACKED: buffer_load_format_d16_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+buffer_load_format_d16_xyz v[1:3], off, s[4:7], s1
+// UNPACKED: buffer_load_format_d16_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+buffer_load_format_d16_xyzw v[1:4], off, s[4:7], s1
+// UNPACKED: buffer_load_format_d16_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+buffer_store_format_d16_x v1, off, s[4:7], s1
+// UNPACKED: buffer_store_format_d16_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+
+buffer_store_format_d16_xy v[1:2], off, s[4:7], s1
+// UNPACKED: buffer_store_format_d16_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+buffer_store_format_d16_xyz v[1:3], off, s[4:7], s1
+// UNPACKED: buffer_store_format_d16_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+buffer_store_format_d16_xyzw v[1:4], off, s[4:7], s1
+// UNPACKED: buffer_store_format_d16_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x3c,0xe0,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+
+//===----------------------------------------------------------------------===//
+// TBuffer Format Instructions.
+//===----------------------------------------------------------------------===//
+
+tbuffer_load_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1
+// UNPACKED: tbuffer_load_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7c,0xe9,0x00,0x01,0x01,0x01]
+
+tbuffer_load_format_d16_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
+// UNPACKED: tbuffer_load_format_d16_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7c,0xe9,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_load_format_d16_xyz v[1:3], off, s[4:7], dfmt:15, nfmt:2, s1
+// UNPACKED: tbuffer_load_format_d16_xyz v[1:3], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7d,0xe9,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_load_format_d16_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1
+// UNPACKED: tbuffer_load_format_d16_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7d,0xe9,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_store_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1
+// UNPACKED: tbuffer_store_format_d16_x v1, off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7e,0xe9,0x00,0x01,0x01,0x01]
+
+tbuffer_store_format_d16_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1
+// UNPACKED: tbuffer_store_format_d16_xy v[1:2], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7e,0xe9,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_store_format_d16_xyz v[1:3], off, s[4:7], dfmt:15, nfmt:2, s1
+// UNPACKED: tbuffer_store_format_d16_xyz v[1:3], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
+
+tbuffer_store_format_d16_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1
+// UNPACKED: tbuffer_store_format_d16_xyzw v[1:4], off, s[4:7], dfmt:15, nfmt:2, s1 ; encoding: [0x00,0x80,0x7f,0xe9,0x00,0x01,0x01,0x01]
+// PACKED-ERR: error: instruction not supported on this GPU
More information about the llvm-commits
mailing list