[llvm] r359698 - [AMDGPU] gfx1010 MIMG implementation
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed May 1 09:32:59 PDT 2019
Author: rampitec
Date: Wed May 1 09:32:58 2019
New Revision: 359698
URL: http://llvm.org/viewvc/llvm-project?rev=359698&view=rev
Log:
[AMDGPU] gfx1010 MIMG implementation
Differential Revision: https://reviews.llvm.org/D61339
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg.s
llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg_err.s
llvm/trunk/test/MC/AMDGPU/mtbuf-gfx10.s
llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt
llvm/trunk/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/include/llvm/MC/MCInst.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Wed May 1 09:32:58 2019
@@ -473,9 +473,12 @@ class arglistconcat<list<list<AMDGPUArg>
}
// Represent texture/image types / dimensionality.
-class AMDGPUDimProps<string name, list<string> coord_names, list<string> slice_names> {
+class AMDGPUDimProps<bits<3> enc, string name, string asmsuffix,
+ list<string> coord_names, list<string> slice_names> {
AMDGPUDimProps Dim = !cast<AMDGPUDimProps>(NAME);
string Name = name; // e.g. "2darraymsaa"
+ string AsmSuffix = asmsuffix; // e.g. 2D_MSAA_ARRAY (used in assembly strings)
+ bits<3> Encoding = enc;
bit DA = 0; // DA bit in MIMG encoding
list<AMDGPUArg> CoordSliceArgs =
@@ -491,17 +494,17 @@ class AMDGPUDimProps<string name, list<s
bits<8> NumGradients = !size(GradientArgs);
}
-def AMDGPUDim1D : AMDGPUDimProps<"1d", ["s"], []>;
-def AMDGPUDim2D : AMDGPUDimProps<"2d", ["s", "t"], []>;
-def AMDGPUDim3D : AMDGPUDimProps<"3d", ["s", "t", "r"], []>;
+def AMDGPUDim1D : AMDGPUDimProps<0x0, "1d", "1D", ["s"], []>;
+def AMDGPUDim2D : AMDGPUDimProps<0x1, "2d", "2D", ["s", "t"], []>;
+def AMDGPUDim3D : AMDGPUDimProps<0x2, "3d", "3D", ["s", "t", "r"], []>;
let DA = 1 in {
- def AMDGPUDimCube : AMDGPUDimProps<"cube", ["s", "t"], ["face"]>;
- def AMDGPUDim1DArray : AMDGPUDimProps<"1darray", ["s"], ["slice"]>;
- def AMDGPUDim2DArray : AMDGPUDimProps<"2darray", ["s", "t"], ["slice"]>;
+ def AMDGPUDimCube : AMDGPUDimProps<0x3, "cube", "CUBE", ["s", "t"], ["face"]>;
+ def AMDGPUDim1DArray : AMDGPUDimProps<0x4, "1darray", "1D_ARRAY", ["s"], ["slice"]>;
+ def AMDGPUDim2DArray : AMDGPUDimProps<0x5, "2darray", "2D_ARRAY", ["s", "t"], ["slice"]>;
}
-def AMDGPUDim2DMsaa : AMDGPUDimProps<"2dmsaa", ["s", "t"], ["fragid"]>;
+def AMDGPUDim2DMsaa : AMDGPUDimProps<0x6, "2dmsaa", "2D_MSAA", ["s", "t"], ["fragid"]>;
let DA = 1 in {
- def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<"2darraymsaa", ["s", "t"], ["slice", "fragid"]>;
+ def AMDGPUDim2DArrayMsaa : AMDGPUDimProps<0x7, "2darraymsaa", "2D_MSAA_ARRAY", ["s", "t"], ["slice", "fragid"]>;
}
def AMDGPUDims {
@@ -678,7 +681,7 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimP
!if(P_.IsSample, [llvm_v4i32_ty, // samp(SGPR)
llvm_i1_ty], []), // unorm(imm)
[llvm_i32_ty, // texfailctrl(imm; bit 0 = tfe, bit 1 = lwe)
- llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty]), // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc)
!listconcat(props,
!if(P_.IsAtomic, [], [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>]),
!if(P_.IsSample, [ImmArg<AMDGPUImageDimIntrinsicEval<P_>.UnormArgIndex>], []),
@@ -884,7 +887,7 @@ class AMDGPURawBufferLoad : Intrinsic <
[llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
[IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad;
@@ -896,7 +899,7 @@ class AMDGPUStructBufferLoad : Intrinsic
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
[IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
@@ -908,7 +911,7 @@ class AMDGPURawBufferStore : Intrinsic <
llvm_v4i32_ty, // rsrc(SGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
[IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore;
@@ -921,7 +924,7 @@ class AMDGPUStructBufferStore : Intrinsi
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
[IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
@@ -1031,7 +1034,7 @@ def int_amdgcn_raw_tbuffer_load : Intrin
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
[IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
@@ -1042,7 +1045,7 @@ def int_amdgcn_raw_tbuffer_store : Intri
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
[IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
@@ -1053,7 +1056,7 @@ def int_amdgcn_struct_tbuffer_load : Int
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
[IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
@@ -1065,7 +1068,7 @@ def int_amdgcn_struct_tbuffer_store : In
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
- llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
+ llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
[IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
Modified: llvm/trunk/include/llvm/MC/MCInst.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCInst.h?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCInst.h (original)
+++ llvm/trunk/include/llvm/MC/MCInst.h Wed May 1 09:32:58 2019
@@ -187,6 +187,7 @@ public:
void clear() { Operands.clear(); }
void erase(iterator I) { Operands.erase(I); }
+ void erase(iterator First, iterator Last) { Operands.erase(First, Last); }
size_t size() const { return Operands.size(); }
iterator begin() { return Operands.begin(); }
const_iterator begin() const { return Operands.begin(); }
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed May 1 09:32:58 2019
@@ -155,6 +155,7 @@ public:
ImmTySdwaSrc1Sel,
ImmTySdwaDstUnused,
ImmTyDMask,
+ ImmTyDim,
ImmTyUNorm,
ImmTyDA,
ImmTyR128A16,
@@ -296,6 +297,7 @@ public:
bool isClampSI() const { return isImmTy(ImmTyClampSI); }
bool isOModSI() const { return isImmTy(ImmTyOModSI); }
bool isDMask() const { return isImmTy(ImmTyDMask); }
+ bool isDim() const { return isImmTy(ImmTyDim); }
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
bool isR128A16() const { return isImmTy(ImmTyR128A16); }
@@ -695,6 +697,7 @@ public:
case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
case ImmTyDMask: OS << "DMask"; break;
+ case ImmTyDim: OS << "Dim"; break;
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
case ImmTyR128A16: OS << "R128A16"; break;
@@ -926,6 +929,10 @@ public:
enum AMDGPUMatchResultTy {
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
};
+ enum OperandMode {
+ OperandMode_Default,
+ OperandMode_NSA,
+ };
using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
@@ -1065,7 +1072,8 @@ public:
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseDirective(AsmToken DirectiveID) override;
- OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
+ OperandMode Mode = OperandMode_Default);
StringRef parseMnemonicSuffix(StringRef Name);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -1133,7 +1141,9 @@ private:
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
+ bool validateMIMGAddrSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
+ bool validateMIMGDim(const MCInst &Inst);
bool validateLdsDirect(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -1211,6 +1221,7 @@ public:
bool IsAtomic = false);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
+ OperandMatchResultTy parseDim(OperandVector &Operands);
OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
AMDGPUOperand::Ptr defaultRowMask() const;
AMDGPUOperand::Ptr defaultBankMask() const;
@@ -2565,6 +2576,46 @@ bool AMDGPUAsmParser::validateMIMGDataSi
return (VDataSize / 4) == DataSize + TFESize;
}
+bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
+ return true;
+
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
+ int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+
+ assert(VAddr0Idx != -1);
+ assert(SrsrcIdx != -1);
+ assert(DimIdx != -1);
+ assert(SrsrcIdx > VAddr0Idx);
+
+ unsigned Dim = Inst.getOperand(DimIdx).getImm();
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
+ bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
+ unsigned VAddrSize =
+ IsNSA ? SrsrcIdx - VAddr0Idx
+ : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
+
+ unsigned AddrSize = BaseOpcode->NumExtraArgs +
+ (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
+ (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ if (!IsNSA) {
+ if (AddrSize > 8)
+ AddrSize = 16;
+ else if (AddrSize > 4)
+ AddrSize = 8;
+ }
+
+ return VAddrSize == AddrSize;
+}
+
bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -2621,6 +2672,24 @@ bool AMDGPUAsmParser::validateMIMGD16(co
return true;
}
+bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
+ const unsigned Opc = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opc);
+
+ if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ return true;
+
+ int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
+ if (DimIdx < 0)
+ return true;
+
+ long Imm = Inst.getOperand(DimIdx).getImm();
+ if (Imm < 0 || Imm >= 8)
+ return false;
+
+ return true;
+}
+
static bool IsRevOpcode(const unsigned Opcode)
{
switch (Opcode) {
@@ -2853,11 +2922,20 @@ bool AMDGPUAsmParser::validateInstructio
"d16 modifier is not supported on this GPU");
return false;
}
+ if (!validateMIMGDim(Inst)) {
+ Error(IDLoc, "dim modifier is required on this GPU");
+ return false;
+ }
if (!validateMIMGDataSize(Inst)) {
Error(IDLoc,
"image data size does not match dmask and tfe");
return false;
}
+ if (!validateMIMGAddrSize(Inst)) {
+ Error(IDLoc,
+ "image address size does not match dim and a16");
+ return false;
+ }
if (!validateMIMGAtomicDMask(Inst)) {
Error(IDLoc,
"invalid atomic image dmask");
@@ -3217,6 +3295,24 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
IDRange);
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
ValRange);
+ } else if (ID == ".amdhsa_workgroup_processor_mode") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_memory_ordered") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
+ ValRange);
+ } else if (ID == ".amdhsa_forward_progress") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
+ ValRange);
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
@@ -3370,6 +3466,22 @@ bool AMDGPUAsmParser::ParseAMDKernelCode
return TokError(Err.str());
}
Lex();
+
+ if (ID == "enable_wgp_mode") {
+ if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
+ return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
+ }
+
+ if (ID == "enable_mem_ordered") {
+ if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
+ return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
+ }
+
+ if (ID == "enable_fwd_progress") {
+ if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
+ return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
+ }
+
return false;
}
@@ -3669,7 +3781,8 @@ bool AMDGPUAsmParser::subtargetHasRegist
}
OperandMatchResultTy
-AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
+AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
+ OperandMode Mode) {
// Try to parse with a custom parser
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -3683,6 +3796,35 @@ AMDGPUAsmParser::parseOperand(OperandVec
getLexer().is(AsmToken::EndOfStatement))
return ResTy;
+ if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
+ unsigned Prefix = Operands.size();
+ SMLoc LBraceLoc = getTok().getLoc();
+ Parser.Lex(); // eat the '['
+
+ for (;;) {
+ ResTy = parseReg(Operands);
+ if (ResTy != MatchOperand_Success)
+ return ResTy;
+
+ if (getLexer().is(AsmToken::RBrac))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+ }
+
+ if (Operands.size() - Prefix > 1) {
+ Operands.insert(Operands.begin() + Prefix,
+ AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
+ Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
+ getTok().getLoc()));
+ }
+
+ Parser.Lex(); // eat the ']'
+ return MatchOperand_Success;
+ }
+
ResTy = parseRegOrImm(Operands);
if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
@@ -3736,8 +3878,13 @@ bool AMDGPUAsmParser::ParseInstruction(P
Name = parseMnemonicSuffix(Name);
Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
+ bool IsMIMG = Name.startswith("image_");
+
while (!getLexer().is(AsmToken::EndOfStatement)) {
- OperandMatchResultTy Res = parseOperand(Operands, Name);
+ OperandMode Mode = OperandMode_Default;
+ if (IsMIMG && isGFX10() && Operands.size() == 2)
+ Mode = OperandMode_NSA;
+ OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
// Eat the comma or space if there is one.
if (getLexer().is(AsmToken::Comma))
@@ -5275,7 +5422,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &In
Op.addRegOperands(Inst, 1);
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
- } else {
+ } else if (!Op.isToken()) {
llvm_unreachable("unexpected operand type");
}
}
@@ -5283,6 +5430,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &In
bool IsGFX10 = isGFX10();
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
+ if (IsGFX10)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
if (IsGFX10)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
@@ -5291,7 +5440,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &In
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+ if (!IsGFX10)
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
}
@@ -5389,7 +5539,7 @@ static const OptionalOperand AMDGPUOptio
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
{"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
- {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
+ {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
@@ -5404,6 +5554,7 @@ static const OptionalOperand AMDGPUOptio
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
+ {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
{"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
{"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
{"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
@@ -5472,7 +5623,9 @@ OperandMatchResultTy AMDGPUAsmParser::pa
Op.Type == AMDGPUOperand::ImmTyNegHi) {
res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
Op.ConvertResult);
- } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
+ } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
+ res = parseDim(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
res = parseDfmtNfmt(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
@@ -5758,6 +5911,52 @@ bool AMDGPUOperand::isU16Imm() const {
return isImm() && isUInt<16>(getImm());
}
+OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
+ if (!isGFX10())
+ return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
+ if (getLexer().getTok().getString() != "dim")
+ return MatchOperand_NoMatch;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+
+ // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
+ // integer.
+ std::string Token;
+ if (getLexer().is(AsmToken::Integer)) {
+ SMLoc Loc = getLexer().getTok().getEndLoc();
+ Token = getLexer().getTok().getString();
+ Parser.Lex();
+ if (getLexer().getTok().getLoc() != Loc)
+ return MatchOperand_ParseFail;
+ }
+ if (getLexer().isNot(AsmToken::Identifier))
+ return MatchOperand_ParseFail;
+ Token += getLexer().getTok().getString();
+
+ StringRef DimId = Token;
+ if (DimId.startswith("SQ_RSRC_IMG_"))
+ DimId = DimId.substr(12);
+
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
+ if (!DimInfo)
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
+ AMDGPUOperand::ImmTyDim));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
using namespace AMDGPU::DPP;
Modified: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp Wed May 1 09:32:58 2019
@@ -290,7 +290,26 @@ DecodeStatus AMDGPUDisassembler::getInst
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
- Res = convertMIMGInst(MI);
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+ int RsrcIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
+ unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
+ if (VAddr0Idx >= 0 && NSAArgs > 0) {
+ unsigned NSAWords = (NSAArgs + 3) / 4;
+ if (Bytes.size() < 4 * NSAWords) {
+ Res = MCDisassembler::Fail;
+ } else {
+ for (unsigned i = 0; i < NSAArgs; ++i) {
+ MI.insert(MI.begin() + VAddr0Idx + 1 + i,
+ decodeOperand_VGPR_32(Bytes[i]));
+ }
+ Bytes = Bytes.slice(4 * NSAWords);
+ }
+ }
+
+ if (Res)
+ Res = convertMIMGInst(MI);
}
if (Res && IsSDWA)
@@ -339,9 +358,9 @@ DecodeStatus AMDGPUDisassembler::convert
return MCDisassembler::Success;
}
-// Note that MIMG format provides no information about VADDR size.
-// Consequently, decoded instructions always show address
-// as if it has 1 dword, which could be not really so.
+// Note that before gfx10, the MIMG encoding provided no information about
+// VADDR size. Consequently, decoded instructions always show address as if it
+// has 1 dword, which could be not really so.
DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
@@ -349,7 +368,8 @@ DecodeStatus AMDGPUDisassembler::convert
int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdata);
-
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
@@ -362,16 +382,42 @@ DecodeStatus AMDGPUDisassembler::convert
assert(DMaskIdx != -1);
assert(TFEIdx != -1);
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
bool IsAtomic = (VDstIdx != -1);
bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
- unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
- if (DMask == 0)
- return MCDisassembler::Success;
+ bool IsNSA = false;
+ unsigned AddrSize = Info->VAddrDwords;
- unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
- if (DstSize == 1)
- return MCDisassembler::Success;
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+ unsigned DimIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ const AMDGPU::MIMGDimInfo *Dim =
+ AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
+
+ AddrSize = BaseOpcode->NumExtraArgs +
+ (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
+ (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA;
+ if (!IsNSA) {
+ if (AddrSize > 8)
+ AddrSize = 16;
+ else if (AddrSize > 4)
+ AddrSize = 8;
+ } else {
+ if (AddrSize > Info->VAddrDwords) {
+ // The NSA encoding does not contain enough operands for the combination
+ // of base opcode / dimension. Should this be an error?
+ return MCDisassembler::Success;
+ }
+ }
+ }
+
+ unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
+ unsigned DstSize = IsGather4 ? 4 : std::max(countPopulation(DMask), 1u);
bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
if (D16 && AMDGPU::hasPackedD16(STI)) {
@@ -382,44 +428,64 @@ DecodeStatus AMDGPUDisassembler::convert
if (MI.getOperand(TFEIdx).getImm())
return MCDisassembler::Success;
- int NewOpcode = -1;
+ if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
+ return MCDisassembler::Success;
- if (IsGather4) {
- if (D16 && AMDGPU::hasPackedD16(STI))
- NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2);
- else
+ int NewOpcode =
+ AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
+ if (NewOpcode == -1)
+ return MCDisassembler::Success;
+
+ // Widen the register to the correct number of enabled channels.
+ unsigned NewVdata = AMDGPU::NoRegister;
+ if (DstSize != Info->VDataDwords) {
+ auto DataRCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
+
+ // Get first subregister of VData
+ unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
+ unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
+ Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
+
+ NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
+ &MRI.getRegClass(DataRCID));
+ if (NewVdata == AMDGPU::NoRegister) {
+ // It's possible to encode this such that the low register + enabled
+ // components exceeds the register count.
return MCDisassembler::Success;
- } else {
- NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize);
- if (NewOpcode == -1)
+ }
+ }
+
+ unsigned NewVAddr0 = AMDGPU::NoRegister;
+ if (STI.getFeatureBits()[AMDGPU::FeatureGFX10] && !IsNSA &&
+ AddrSize != Info->VAddrDwords) {
+ unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
+ unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
+ VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
+
+ auto AddrRCID = MCII->get(NewOpcode).OpInfo[VAddr0Idx].RegClass;
+ NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
+ &MRI.getRegClass(AddrRCID));
+ if (NewVAddr0 == AMDGPU::NoRegister)
return MCDisassembler::Success;
}
- auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
+ MI.setOpcode(NewOpcode);
- // Get first subregister of VData
- unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
- unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
- Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
+ if (NewVdata != AMDGPU::NoRegister) {
+ MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
- // Widen the register to the correct number of enabled channels.
- auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
- &MRI.getRegClass(RCID));
- if (NewVdata == AMDGPU::NoRegister) {
- // It's possible to encode this such that the low register + enabled
- // components exceeds the register count.
- return MCDisassembler::Success;
+ if (IsAtomic) {
+ // Atomic operations have an additional operand (a copy of data)
+ MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
+ }
}
- MI.setOpcode(NewOpcode);
- // vaddr will be always appear as a single VGPR. This will look different than
- // how it is usually emitted because the number of register components is not
- // in the instruction encoding.
- MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
-
- if (IsAtomic) {
- // Atomic operations have an additional operand (a copy of data)
- MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
+ if (NewVAddr0 != AMDGPU::NoRegister) {
+ MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
+ } else if (IsNSA) {
+ assert(AddrSize <= Info->VAddrDwords);
+ MI.erase(MI.begin() + VAddr0Idx + AddrSize,
+ MI.begin() + VAddr0Idx + Info->VAddrDwords);
}
return MCDisassembler::Success;
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Wed May 1 09:32:58 2019
@@ -208,6 +208,18 @@ void AMDGPUInstPrinter::printDMask(const
}
}
+void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ unsigned Dim = MI->getOperand(OpNo).getImm();
+ O << " dim:SQ_RSRC_IMG_";
+
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
+ if (DimInfo)
+ O << DimInfo->AsmSuffix;
+ else
+ O << Dim;
+}
+
void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "unorm");
@@ -254,8 +266,12 @@ void AMDGPUInstPrinter::printFORMAT(cons
const MCSubtargetInfo &STI,
raw_ostream &O) {
if (unsigned Val = MI->getOperand(OpNo).getImm()) {
- O << " dfmt:" << (Val & 15);
- O << ", nfmt:" << (Val >> 4);
+ if (AMDGPU::isGFX10(STI))
+ O << " format:" << Val;
+ else {
+ O << " dfmt:" << (Val & 15);
+ O << ", nfmt:" << (Val >> 4);
+ }
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Wed May 1 09:32:58 2019
@@ -78,6 +78,8 @@ private:
raw_ostream &O);
void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp Wed May 1 09:32:58 2019
@@ -16,6 +16,7 @@
#include "MCTargetDesc/AMDGPUFixupKinds.h"
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -273,7 +274,25 @@ void SIMCCodeEmitter::encodeInstruction(
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
}
- if (bytes > 4)
+ // NSA encoding.
+ if (AMDGPU::isGFX10(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
+ int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vaddr0);
+ int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::srsrc);
+ assert(vaddr0 >= 0 && srsrc > vaddr0);
+ unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
+ unsigned NumPadding = (-NumExtraAddrs) & 3;
+
+ for (unsigned i = 0; i < NumExtraAddrs; ++i)
+ OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i),
+ Fixups, STI));
+ for (unsigned i = 0; i < NumPadding; ++i)
+ OS.write(0);
+ }
+
+ if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) ||
+ (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
return;
// Check for additional literals in SRC0/1/2 (Op 1/2/3)
@@ -428,7 +447,8 @@ uint64_t SIMCCodeEmitter::getMachineOpVa
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
- if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
+ if (Enc != ~0U &&
+ (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8))
return Enc;
} else if (MO.isImm())
Modified: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td Wed May 1 09:32:58 2019
@@ -11,10 +11,14 @@
//
// - MIMGEncGfx6: encoding introduced with gfx6 (obsoleted for atomics in gfx8)
// - MIMGEncGfx8: encoding introduced with gfx8 for atomics
+// - MIMGEncGfx10Default: gfx default (non-NSA) encoding
+// - MIMGEncGfx10NSA: gfx10 NSA encoding
class MIMGEncoding;
def MIMGEncGfx6 : MIMGEncoding;
def MIMGEncGfx8 : MIMGEncoding;
+def MIMGEncGfx10Default : MIMGEncoding;
+def MIMGEncGfx10NSA : MIMGEncoding;
def MIMGEncoding : GenericEnum {
let FilterClass = "MIMGEncoding";
@@ -59,13 +63,28 @@ def MIMGDim : GenericEnum {
def MIMGDimInfoTable : GenericTable {
let FilterClass = "AMDGPUDimProps";
let CppTypeName = "MIMGDimInfo";
- let Fields = ["Dim", "NumCoords", "NumGradients", "DA"];
+ let Fields = ["Dim", "NumCoords", "NumGradients", "DA", "Encoding", "AsmSuffix"];
GenericEnum TypeOf_Dim = MIMGDim;
let PrimaryKey = ["Dim"];
let PrimaryKeyName = "getMIMGDimInfo";
}
+def getMIMGDimInfoByEncoding : SearchIndex {
+ let Table = MIMGDimInfoTable;
+ let Key = ["Encoding"];
+}
+
+def getMIMGDimInfoByAsmSuffix : SearchIndex {
+ let Table = MIMGDimInfoTable;
+ let Key = ["AsmSuffix"];
+}
+
+class mimg <bits<7> si_gfx10, bits<7> vi = si_gfx10> {
+ field bits<7> SI_GFX10 = si_gfx10;
+ field bits<7> VI = vi;
+}
+
class MIMGLZMapping<MIMGBaseOpcode l, MIMGBaseOpcode lz> {
MIMGBaseOpcode L = l;
MIMGBaseOpcode LZ = lz;
@@ -82,11 +101,6 @@ def MIMGLZMappingTable : GenericTable {
let PrimaryKeyName = "getMIMGLZMappingInfo";
}
-class mimg <bits<7> si, bits<7> vi = si> {
- field bits<7> SI = si;
- field bits<7> VI = vi;
-}
-
class MIMG <dag outs, string dns = "">
: InstSI <outs, (ins), "", []> {
@@ -108,7 +122,7 @@ class MIMG <dag outs, string dns = "">
Instruction Opcode = !cast<Instruction>(NAME);
MIMGBaseOpcode BaseOpcode;
- MIMGEncoding MIMGEncoding = MIMGEncGfx6;
+ MIMGEncoding MIMGEncoding;
bits<8> VDataDwords;
bits<8> VAddrDwords;
}
@@ -129,15 +143,66 @@ def getMIMGInfo : SearchIndex {
let Key = ["Opcode"];
}
+// This is a separate class so that TableGen memoizes the computations.
+class MIMGNSAHelper<int num_addrs> {
+ list<string> AddrAsmNames =
+ !foldl([]<string>, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], lhs, i,
+ !if(!lt(i, num_addrs), !listconcat(lhs, ["vaddr"#!size(lhs)]), lhs));
+ dag AddrIns = !dag(ins, !foreach(arg, AddrAsmNames, VGPR_32), AddrAsmNames);
+ string AddrAsm = "[" # !foldl("$" # !head(AddrAsmNames), !tail(AddrAsmNames), lhs, rhs,
+ lhs # ", $" # rhs) # "]";
+
+ int NSA = !if(!le(num_addrs, 1), ?,
+ !if(!le(num_addrs, 5), 1,
+ !if(!le(num_addrs, 9), 2,
+ !if(!le(num_addrs, 13), 3, ?))));
+}
+
+// Base class of all pre-gfx10 MIMG instructions.
+class MIMG_gfx6789<bits<7> op, dag outs, string dns = "">
+ : MIMG<outs, dns>, MIMGe_gfx6789<op> {
+ let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
+ let AssemblerPredicates = [isGFX6GFX7GFX8GFX9];
+
+ let MIMGEncoding = MIMGEncGfx6;
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+}
+
+// Base class of all non-NSA gfx10 MIMG instructions.
+class MIMG_gfx10<int op, dag outs, string dns = "">
+ : MIMG<outs, dns>, MIMGe_gfx10<op> {
+ let SubtargetPredicate = isGFX10Plus;
+ let AssemblerPredicates = [isGFX10Plus];
+
+ let MIMGEncoding = MIMGEncGfx10Default;
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+ let nsa = 0;
+}
+
+// Base class for all NSA MIMG instructions. Note that 1-dword addresses always
+// use non-NSA variants.
+class MIMG_nsa_gfx10<int op, dag outs, int num_addrs, string dns="">
+ : MIMG<outs, dns>, MIMGe_gfx10<op> {
+ let SubtargetPredicate = isGFX10Plus;
+ let AssemblerPredicates = [isGFX10Plus];
+
+ let MIMGEncoding = MIMGEncGfx10NSA;
+
+ MIMGNSAHelper nsah = MIMGNSAHelper<num_addrs>;
+ dag AddrIns = nsah.AddrIns;
+ string AddrAsm = nsah.AddrAsm;
+
+ let d16 = !if(BaseOpcode.HasD16, ?, 0);
+ let nsa = nsah.NSA;
+}
+
class MIMG_NoSampler_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
RegisterClass addr_rc,
string dns="">
- : MIMG <(outs dst_rc:$vdata), dns>,
- MIMGe<op> {
- let ssamp = 0;
- let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
+ : MIMG_gfx6789 <op, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -146,18 +211,61 @@ class MIMG_NoSampler_Helper <bits<7> op,
#!if(BaseOpcode.HasD16, "$d16", "");
}
+class MIMG_NoSampler_gfx10<int op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_NoSampler_nsa_gfx10<int op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx10<op, (outs DataRC:$vdata), num_addrs, dns> {
+ let InOperandList = !con(AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
RegisterClass dst_rc,
bit enableDisasm> {
- let VAddrDwords = 1 in
- def NAME # _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
- !if(enableDisasm, "AMDGPU", "")>;
- let VAddrDwords = 2 in
- def NAME # _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
- let VAddrDwords = 3 in
- def NAME # _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
- let VAddrDwords = 4 in
- def NAME # _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
+ let ssamp = 0 in {
+ let VAddrDwords = 1 in {
+ def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ def _V1_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+
+ let VAddrDwords = 2 in {
+ def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>;
+ def _V2_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_64>;
+ def _V2_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 2>;
+ }
+
+ let VAddrDwords = 3 in {
+ def _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96>;
+ def _V3_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_96>;
+ def _V3_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 3>;
+ }
+
+ let VAddrDwords = 4 in {
+ def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>;
+ def _V4_gfx10 : MIMG_NoSampler_gfx10<op, asm, dst_rc, VReg_128>;
+ def _V4_nsa_gfx10 : MIMG_NoSampler_nsa_gfx10<op, asm, dst_rc, 4,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+ }
}
multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16, bit mip = 0,
@@ -187,17 +295,7 @@ class MIMG_Store_Helper <bits<7> op, str
RegisterClass data_rc,
RegisterClass addr_rc,
string dns = "">
- : MIMG <(outs), dns>,
- MIMGe<op> {
- let ssamp = 0;
- let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
- let mayLoad = 0;
- let mayStore = 1;
- let hasSideEffects = 0;
- let hasPostISelHook = 0;
- let DisableWQM = 1;
-
+ : MIMG_gfx6789<op, (outs), dns> {
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -206,18 +304,60 @@ class MIMG_Store_Helper <bits<7> op, str
#!if(BaseOpcode.HasD16, "$d16", "");
}
-multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
+class MIMG_Store_gfx10<int op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx10<op, (outs), dns> {
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+ GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Store_nsa_gfx10<int op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx10<op, (outs), num_addrs, dns> {
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+multiclass MIMG_Store_Addr_Helper <int op, string asm,
RegisterClass data_rc,
bit enableDisasm> {
- let VAddrDwords = 1 in
- def NAME # _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
- !if(enableDisasm, "AMDGPU", "")>;
- let VAddrDwords = 2 in
- def NAME # _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
- let VAddrDwords = 3 in
- def NAME # _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
- let VAddrDwords = 4 in
- def NAME # _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
+ let mayLoad = 0, mayStore = 1, hasSideEffects = 0, hasPostISelHook = 0,
+ DisableWQM = 1, ssamp = 0 in {
+ let VAddrDwords = 1 in {
+ def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ def _V1_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VGPR_32,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+ let VAddrDwords = 2 in {
+ def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>;
+ def _V2_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_64>;
+ def _V2_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 2>;
+ }
+ let VAddrDwords = 3 in {
+ def _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96>;
+ def _V3_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_96>;
+ def _V3_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 3>;
+ }
+ let VAddrDwords = 4 in {
+ def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>;
+ def _V4_gfx10 : MIMG_Store_gfx10 <op, asm, data_rc, VReg_128>;
+ def _V4_nsa_gfx10 : MIMG_Store_nsa_gfx10 <op, asm, data_rc, 4,
+ !if(enableDisasm, "AMDGPU", "")>;
+ }
+ }
}
multiclass MIMG_Store <bits<7> op, string asm, bit has_d16, bit mip = 0> {
@@ -239,15 +379,9 @@ multiclass MIMG_Store <bits<7> op, strin
}
}
-class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
- RegisterClass addr_rc, string dns="",
- bit enableDasm = 0>
- : MIMG <(outs data_rc:$vdst), !if(enableDasm, dns, "")> {
- let mayLoad = 1;
- let mayStore = 1;
- let hasSideEffects = 1; // FIXME: Remove this
- let hasPostISelHook = 0;
- let DisableWQM = 1;
+class MIMG_Atomic_gfx6789_base <bits<7> op, string asm, RegisterClass data_rc,
+ RegisterClass addr_rc, string dns="">
+ : MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
let Constraints = "$vdst = $vdata";
let AsmMatchConverter = "cvtMIMGAtomic";
@@ -257,39 +391,80 @@ class MIMG_Atomic_Helper <string asm, Re
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
}
-multiclass MIMG_Atomic_Helper_m <mimg op, string asm, RegisterClass data_rc,
- RegisterClass addr_rc, bit enableDasm = 0> {
- let ssamp = 0, d16 = 0 in {
- def _si : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "GFX6GFX7", enableDasm>,
- SIMCInstr<NAME, SIEncodingFamily.SI>,
- MIMGe<op.SI> {
- let AssemblerPredicates = [isGFX6GFX7];
- let DisableDecoder = DisableSIDecoder;
- }
-
- def _vi : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "GFX8", enableDasm>,
- SIMCInstr<NAME, SIEncodingFamily.VI>,
- MIMGe<op.VI> {
- let AssemblerPredicates = [isGFX8GFX9];
- let DisableDecoder = DisableVIDecoder;
- let MIMGEncoding = MIMGEncGfx8;
- }
- }
+class MIMG_Atomic_si<mimg op, string asm, RegisterClass data_rc,
+ RegisterClass addr_rc, bit enableDasm = 0>
+ : MIMG_Atomic_gfx6789_base<op.SI_GFX10, asm, data_rc, addr_rc,
+ !if(enableDasm, "GFX6GFX7", "")> {
+ let AssemblerPredicates = [isGFX6GFX7];
+}
+
+class MIMG_Atomic_vi<mimg op, string asm, RegisterClass data_rc,
+ RegisterClass addr_rc, bit enableDasm = 0>
+ : MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
+ let AssemblerPredicates = [isGFX8GFX9];
+ let MIMGEncoding = MIMGEncGfx8;
+}
+
+class MIMG_Atomic_gfx10<mimg op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ bit enableDisasm = 0>
+ : MIMG_gfx10<!cast<int>(op.SI_GFX10), (outs DataRC:$vdst),
+ !if(enableDisasm, "AMDGPU", "")> {
+ let Constraints = "$vdst = $vdata";
+ let AsmMatchConverter = "cvtMIMGAtomic";
+
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+ GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe);
+ let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+}
+
+class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ bit enableDisasm = 0>
+ : MIMG_nsa_gfx10<!cast<int>(op.SI_GFX10), (outs DataRC:$vdst), num_addrs,
+ !if(enableDisasm, "AMDGPU", "")> {
+ let Constraints = "$vdst = $vdata";
+ let AsmMatchConverter = "cvtMIMGAtomic";
+
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
}
multiclass MIMG_Atomic_Addr_Helper_m <mimg op, string asm,
RegisterClass data_rc,
bit enableDasm = 0> {
- // _V* variants have different address size, but the size is not encoded.
- // So only one variant can be disassembled. V1 looks the safest to decode.
- let VAddrDwords = 1 in
- defm _V1 : MIMG_Atomic_Helper_m <op, asm, data_rc, VGPR_32, enableDasm>;
- let VAddrDwords = 2 in
- defm _V2 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_64>;
- let VAddrDwords = 3 in
- defm _V3 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_96>;
- let VAddrDwords = 4 in
- defm _V4 : MIMG_Atomic_Helper_m <op, asm, data_rc, VReg_128>;
+ let hasSideEffects = 1, // FIXME: remove this
+ mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
+ ssamp = 0 in {
+ let VAddrDwords = 1 in {
+ def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ let VAddrDwords = 2 in {
+ def _V2_si : MIMG_Atomic_si <op, asm, data_rc, VReg_64, 0>;
+ def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
+ def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
+ def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 2, 0>;
+ }
+ let VAddrDwords = 3 in {
+ def _V3_si : MIMG_Atomic_si <op, asm, data_rc, VReg_96, 0>;
+ def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
+ def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
+ def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 3, 0>;
+ }
+ let VAddrDwords = 4 in {
+ def _V4_si : MIMG_Atomic_si <op, asm, data_rc, VReg_128, 0>;
+ def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
+ def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
+ def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
+ }
+ }
}
multiclass MIMG_Atomic <mimg op, string asm, bit isCmpSwap = 0> { // 64-bit atomics
@@ -311,10 +486,7 @@ multiclass MIMG_Atomic <mimg op, string
class MIMG_Sampler_Helper <bits<7> op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
- : MIMG <(outs dst_rc:$vdata), dns>,
- MIMGe<op> {
- let d16 = !if(BaseOpcode.HasD16, ?, 0);
-
+ : MIMG_gfx6789 <op, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
@@ -323,6 +495,33 @@ class MIMG_Sampler_Helper <bits<7> op, s
#!if(BaseOpcode.HasD16, "$d16", "");
}
+class MIMG_Sampler_gfx10<int op, string opcode,
+ RegisterClass DataRC, RegisterClass AddrRC,
+ string dns="">
+ : MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
+ GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
+ #"$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
+class MIMG_Sampler_nsa_gfx10<int op, string opcode,
+ RegisterClass DataRC, int num_addrs,
+ string dns="">
+ : MIMG_nsa_gfx10<op, (outs DataRC:$vdata), num_addrs, dns> {
+ let InOperandList = !con(AddrIns,
+ (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
+ SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+ !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
+ #"$dlc$glc$slc$r128$tfe$lwe"
+ #!if(BaseOpcode.HasD16, "$d16", "");
+}
+
class MIMGAddrSize<int dw, bit enable_disasm> {
int NumWords = dw;
@@ -339,6 +538,11 @@ class MIMGAddrSize<int dw, bit enable_di
bit Disassemble = enable_disasm;
}
+// Return whether x is in lst.
+class isIntInList<int x, list<int> lst> {
+ bit ret = !foldl(0, lst, lhs, y, !or(lhs, !eq(x, y)));
+}
+
// Return whether a value inside the range [min, max] (endpoints inclusive)
// is in the given list.
class isRangeInList<int min, int max, list<int> lst> {
@@ -374,16 +578,41 @@ class MIMG_Sampler_AddrSizes<AMDGPUSampl
!listconcat(lhs.List, [MIMGAddrSize<dw, !empty(lhs.List)>]),
!if(!eq(dw, 3), 3, !add(dw, 1))>, // we still need _V4 for codegen w/ 3 dwords
lhs)).List;
+
+ // For NSA, generate machine instructions for all possible numbers of words
+ // except 1 (which is already covered by the non-NSA case).
+ // The disassembler defaults to the largest number of arguments among the
+ // variants with the same number of NSA words, and custom code then derives
+ // the exact variant based on the sample variant and the image dimension.
+ list<MIMGAddrSize> NSAInstrs =
+ !foldl([]<MIMGAddrSize>, [[12, 11, 10], [9, 8, 7, 6], [5, 4, 3, 2]], prev, nsa_group,
+ !listconcat(prev,
+ !foldl([]<MIMGAddrSize>, nsa_group, lhs, dw,
+ !if(isIntInList<dw, AllNumAddrWords>.ret,
+ !listconcat(lhs, [MIMGAddrSize<dw, !empty(lhs)>]),
+ lhs))));
}
multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
AMDGPUSampleVariant sample, RegisterClass dst_rc,
bit enableDisasm = 0> {
foreach addr = MIMG_Sampler_AddrSizes<sample>.MachineInstrs in {
- let VAddrDwords = addr.NumWords in
- def _V # addr.NumWords
- : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
- !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ let VAddrDwords = addr.NumWords in {
+ def _V # addr.NumWords
+ : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
+ !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ def _V # addr.NumWords # _gfx10
+ : MIMG_Sampler_gfx10 <op, asm, dst_rc, addr.RegClass,
+ !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ }
+ }
+
+ foreach addr = MIMG_Sampler_AddrSizes<sample>.NSAInstrs in {
+ let VAddrDwords = addr.NumWords in {
+ def _V # addr.NumWords # _nsa_gfx10
+ : MIMG_Sampler_nsa_gfx10<op, asm, dst_rc, addr.NumWords,
+ !if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
+ }
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed May 1 09:32:58 2019
@@ -1003,6 +1003,13 @@ bool SITargetLowering::isLegalFlatAddres
// GFX9 added a 13-bit signed offset. When using regular flat instructions,
// the sign bit is ignored and is treated as a 12-bit unsigned offset.
+ // GFX10 shrinked signed offset to 12 bits. When using regular flat
+ // instructions, the sign bit is also ignored and is treated as 11-bit
+ // unsigned offset.
+
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
+ return isUInt<11>(AM.BaseOffs) && AM.Scale == 0;
+
// Just r + i
return isUInt<12>(AM.BaseOffs) && AM.Scale == 0;
}
@@ -2828,8 +2835,9 @@ unsigned SITargetLowering::getRegisterBy
}
- if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
- Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
+ if ((Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) &&
+ Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
report_fatal_error(Twine("invalid register \""
+ StringRef(RegName) + "\" for subtarget."));
}
@@ -4656,7 +4664,7 @@ static SDValue getBuildDwordsVector(Sele
}
static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
- SDValue *GLC, SDValue *SLC) {
+ SDValue *GLC, SDValue *SLC, SDValue *DLC) {
auto CachePolicyConst = cast<ConstantSDNode>(CachePolicy.getNode());
uint64_t Value = CachePolicyConst->getZExtValue();
@@ -4669,6 +4677,10 @@ static bool parseCachePolicy(SDValue Cac
*SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x2;
}
+ if (DLC) {
+ *DLC = DAG.getTargetConstant((Value & 0x4) ? 1 : 0, DL, MVT::i32);
+ Value &= ~(uint64_t)0x4;
+ }
return Value == 0;
}
@@ -4786,6 +4798,7 @@ SDValue SITargetLowering::lowerImage(SDV
const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
unsigned IntrOpcode = Intr->BaseOpcode;
+ bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
SmallVector<EVT, 3> ResultTypes(Op->value_begin(), Op->value_end());
SmallVector<EVT, 3> OrigResultTypes(Op->value_begin(), Op->value_end());
@@ -4924,7 +4937,22 @@ SDValue SITargetLowering::lowerImage(SDV
VAddrs.push_back(Op.getOperand(AddrIdx + i));
}
- SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
+ // If the register allocator cannot place the address registers contiguously
+ // without introducing moves, then using the non-sequential address encoding
+ // is always preferable, since it saves VALU instructions and is usually a
+ // wash in terms of code size or even better.
+ //
+ // However, we currently have no way of hinting to the register allocator that
+ // MIMG addresses should be placed contiguously when it is possible to do so,
+ // so force non-NSA for the common 2-address case as a heuristic.
+ //
+ // SIShrinkInstructions will convert NSA encodings to non-NSA after register
+ // allocation when possible.
+ bool UseNSA =
+ ST->hasFeature(AMDGPU::FeatureNSAEncoding) && VAddrs.size() >= 3;
+ SDValue VAddr;
+ if (!UseNSA)
+ VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
@@ -4987,45 +5015,66 @@ SDValue SITargetLowering::lowerImage(SDV
SDValue GLC;
SDValue SLC;
+ SDValue DLC;
if (BaseOpcode->Atomic) {
GLC = True; // TODO no-return optimization
- if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC))
+ if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC,
+ IsGFX10 ? &DLC : nullptr))
return Op;
} else {
- if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC))
+ if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC,
+ IsGFX10 ? &DLC : nullptr))
return Op;
}
- SmallVector<SDValue, 14> Ops;
+ SmallVector<SDValue, 26> Ops;
if (BaseOpcode->Store || BaseOpcode->Atomic)
Ops.push_back(VData); // vdata
- Ops.push_back(VAddr);
+ if (UseNSA) {
+ for (const SDValue &Addr : VAddrs)
+ Ops.push_back(Addr);
+ } else {
+ Ops.push_back(VAddr);
+ }
Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc
if (BaseOpcode->Sampler)
Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler
Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
+ if (IsGFX10)
+ Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
Ops.push_back(Unorm);
+ if (IsGFX10)
+ Ops.push_back(DLC);
Ops.push_back(GLC);
Ops.push_back(SLC);
Ops.push_back(IsA16 && // a16 or r128
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
Ops.push_back(TFE); // tfe
Ops.push_back(LWE); // lwe
- Ops.push_back(DimInfo->DA ? True : False);
+ if (!IsGFX10)
+ Ops.push_back(DimInfo->DA ? True : False);
if (BaseOpcode->HasD16)
Ops.push_back(IsD16 ? True : False);
if (isa<MemSDNode>(Op))
Ops.push_back(Op.getOperand(0)); // chain
- int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32;
+ int NumVAddrDwords =
+ UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
int Opcode = -1;
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
- NumVDataDwords, NumVAddrDwords);
- if (Opcode == -1)
- Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
+ if (IsGFX10) {
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
+ UseNSA ? AMDGPU::MIMGEncGfx10NSA
+ : AMDGPU::MIMGEncGfx10Default,
NumVDataDwords, NumVAddrDwords);
+ } else {
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
+ NumVDataDwords, NumVAddrDwords);
+ if (Opcode == -1)
+ Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
+ NumVDataDwords, NumVAddrDwords);
+ }
assert(Opcode != -1);
MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Wed May 1 09:32:58 2019
@@ -246,38 +246,58 @@ class VINTRPe <bits<2> op> : Enc32 {
let Inst{31-26} = 0x32; // encoding
}
-class MIMGe <bits<7> op> : Enc64 {
+class MIMGe : Enc64 {
bits<8> vdata;
bits<4> dmask;
bits<1> unorm;
bits<1> glc;
- bits<1> da;
bits<1> r128;
bits<1> tfe;
bits<1> lwe;
bits<1> slc;
bit d16;
- bits<8> vaddr;
bits<7> srsrc;
bits<7> ssamp;
let Inst{11-8} = dmask;
let Inst{12} = unorm;
let Inst{13} = glc;
- let Inst{14} = da;
let Inst{15} = r128;
let Inst{16} = tfe;
let Inst{17} = lwe;
- let Inst{24-18} = op;
let Inst{25} = slc;
let Inst{31-26} = 0x3c;
- let Inst{39-32} = vaddr;
let Inst{47-40} = vdata;
let Inst{52-48} = srsrc{6-2};
let Inst{57-53} = ssamp{6-2};
let Inst{63} = d16;
}
+class MIMGe_gfx6789 <bits<7> op> : MIMGe {
+ bits<8> vaddr;
+ bits<1> da;
+
+ let Inst{14} = da;
+ let Inst{24-18} = op;
+ let Inst{39-32} = vaddr;
+}
+
+class MIMGe_gfx10 <bits<8> op> : MIMGe {
+ bits<8> vaddr0;
+ bits<3> dim;
+ bits<2> nsa;
+ bits<1> dlc;
+ bits<1> a16 = 0; // TODO: this should be an operand
+
+ let Inst{0} = op{7};
+ let Inst{2-1} = nsa;
+ let Inst{5-3} = dim;
+ let Inst{7} = dlc;
+ let Inst{24-18} = op{6-0};
+ let Inst{39-32} = vaddr0;
+ let Inst{62} = a16;
+}
+
class EXPe : Enc64 {
bits<4> en;
bits<6> tgt;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed May 1 09:32:58 2019
@@ -3223,6 +3223,53 @@ bool SIInstrInfo::verifyInstruction(cons
}
}
+ if (isMIMG(MI)) {
+ const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
+ if (DimOp) {
+ int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
+ AMDGPU::OpName::vaddr0);
+ int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opcode);
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ const AMDGPU::MIMGDimInfo *Dim =
+ AMDGPU::getMIMGDimInfoByEncoding(DimOp->getImm());
+
+ if (!Dim) {
+ ErrInfo = "dim is out of range";
+ return false;
+ }
+
+ bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
+ unsigned AddrWords = BaseOpcode->NumExtraArgs +
+ (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
+ (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+
+ unsigned VAddrWords;
+ if (IsNSA) {
+ VAddrWords = SRsrcIdx - VAddr0Idx;
+ } else {
+ const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx);
+ VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;
+ if (AddrWords > 8)
+ AddrWords = 16;
+ else if (AddrWords > 4)
+ AddrWords = 8;
+ else if (AddrWords == 3 && VAddrWords == 4) {
+ // CodeGen uses the V4 variant of instructions for three addresses,
+ // because the selection DAG does not support non-power-of-two types.
+ AddrWords = 4;
+ }
+ }
+
+ if (VAddrWords != AddrWords) {
+ ErrInfo = "bad vaddr size";
+ return false;
+ }
+ }
+ }
+
const MachineOperand *DppCt = getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl);
if (DppCt) {
using namespace AMDGPU::DPP;
@@ -5356,25 +5403,35 @@ unsigned SIInstrInfo::getInstSizeInBytes
return DescSize; // No operands.
if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
- return DescSize + 4;
+ return isVOP3(MI) ? 12 : (DescSize + 4);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
return DescSize;
if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
- return DescSize + 4;
+ return isVOP3(MI) ? 12 : (DescSize + 4);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx == -1)
return DescSize;
if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
- return DescSize + 4;
+ return isVOP3(MI) ? 12 : (DescSize + 4);
return DescSize;
}
+ // Check whether we have extra NSA words.
+ if (isMIMG(MI)) {
+ int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
+ if (VAddr0Idx < 0)
+ return 8;
+
+ int RSrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
+ }
+
switch (Opc) {
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed May 1 09:32:58 2019
@@ -845,6 +845,7 @@ def exp_vm : NamedOperandBit<"ExpVM", Na
def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
+def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Wed May 1 09:32:58 2019
@@ -38,6 +38,8 @@ class SIShrinkInstructions : public Mach
public:
static char ID;
+ void shrinkMIMG(MachineInstr &MI);
+
public:
SIShrinkInstructions() : MachineFunctionPass(ID) {
}
@@ -211,6 +213,96 @@ static void shrinkScalarCompare(const SI
}
}
+// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
+void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) {
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+ if (Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA)
+ return;
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+ unsigned NewAddrDwords = Info->VAddrDwords;
+ const TargetRegisterClass *RC;
+
+ if (Info->VAddrDwords == 2) {
+ RC = &AMDGPU::VReg_64RegClass;
+ } else if (Info->VAddrDwords == 3) {
+ RC = &AMDGPU::VReg_96RegClass;
+ } else if (Info->VAddrDwords == 4) {
+ RC = &AMDGPU::VReg_128RegClass;
+ } else if (Info->VAddrDwords <= 8) {
+ RC = &AMDGPU::VReg_256RegClass;
+ NewAddrDwords = 8;
+ } else {
+ RC = &AMDGPU::VReg_512RegClass;
+ NewAddrDwords = 16;
+ }
+
+ unsigned VgprBase = 0;
+ bool IsUndef = true;
+ bool IsKill = NewAddrDwords == Info->VAddrDwords;
+ for (unsigned i = 0; i < Info->VAddrDwords; ++i) {
+ const MachineOperand &Op = MI.getOperand(VAddr0Idx + i);
+ unsigned Vgpr = TRI.getHWRegIndex(Op.getReg());
+
+ if (i == 0) {
+ VgprBase = Vgpr;
+ } else if (VgprBase + i != Vgpr)
+ return;
+
+ if (!Op.isUndef())
+ IsUndef = false;
+ if (!Op.isKill())
+ IsKill = false;
+ }
+
+ if (VgprBase + NewAddrDwords > 256)
+ return;
+
+ // Further check for implicit tied operands - this may be present if TFE is
+ // enabled
+ int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
+ int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
+ unsigned TFEVal = MI.getOperand(TFEIdx).getImm();
+ unsigned LWEVal = MI.getOperand(LWEIdx).getImm();
+ int ToUntie = -1;
+ if (TFEVal || LWEVal) {
+ // TFE/LWE is enabled so we need to deal with an implicit tied operand
+ for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
+ MI.getOperand(i).isImplicit()) {
+ // This is the tied operand
+ assert(
+ ToUntie == -1 &&
+ "found more than one tied implicit operand when expecting only 1");
+ ToUntie = i;
+ MI.untieRegOperand(ToUntie);
+ }
+ }
+ }
+
+ unsigned NewOpcode =
+ AMDGPU::getMIMGOpcode(Info->BaseOpcode, AMDGPU::MIMGEncGfx10Default,
+ Info->VDataDwords, NewAddrDwords);
+ MI.setDesc(TII->get(NewOpcode));
+ MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
+ MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
+ MI.getOperand(VAddr0Idx).setIsKill(IsKill);
+
+ for (unsigned i = 1; i < Info->VAddrDwords; ++i)
+ MI.RemoveOperand(VAddr0Idx + 1);
+
+ if (ToUntie >= 0) {
+ MI.tieOperands(
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
+ ToUntie - (Info->VAddrDwords - 1));
+ }
+}
+
/// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
@@ -597,6 +689,14 @@ bool SIShrinkInstructions::runOnMachineF
continue;
}
+ if (TII->isMIMG(MI.getOpcode()) &&
+ ST.getGeneration() >= AMDGPUSubtarget::GFX10 &&
+ MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs)) {
+ shrinkMIMG(MI);
+ continue;
+ }
+
if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
continue;
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Wed May 1 09:32:58 2019
@@ -200,6 +200,8 @@ struct MIMGDimInfo {
uint8_t NumCoords;
uint8_t NumGradients;
bool DA;
+ uint8_t Encoding;
+ const char *AsmSuffix;
};
LLVM_READONLY
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll Wed May 1 09:32:58 2019
@@ -1,8 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}atomic_swap_1d:
-; GCN: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -11,7 +13,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_1d:
-; GCN: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -20,7 +23,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_sub_1d:
-; GCN: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -29,7 +33,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_smin_1d:
-; GCN: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -38,7 +43,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_umin_1d:
-; GCN: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -47,7 +53,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_smax_1d:
-; GCN: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -56,7 +63,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_umax_1d:
-; GCN: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -65,7 +73,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_and_1d:
-; GCN: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -74,7 +83,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_or_1d:
-; GCN: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -83,7 +93,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_xor_1d:
-; GCN: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -92,7 +103,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_inc_1d:
-; GCN: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -101,7 +113,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_dec_1d:
-; GCN: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -110,7 +123,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_cmpswap_1d:
-; GCN: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}}
+; GFX6789: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc{{$}}
+; GFX10: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc ;
define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -119,7 +133,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_2d:
-; GCN: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc ;
define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -128,7 +143,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_3d:
-; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ;
define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
@@ -137,7 +153,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_cube:
-; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc ;
define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
@@ -146,7 +163,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_1darray:
-; GCN: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da{{$}}
+; GFX6789: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc da{{$}}
+; GFX10: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc ;
define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
@@ -155,7 +173,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_2darray:
-; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc ;
define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
@@ -164,7 +183,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_2dmsaa:
-; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc{{$}}
+; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ;
define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
@@ -173,7 +193,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_2darraymsaa:
-; GCN: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+; GFX6789: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 unorm glc da{{$}}
+; GFX10: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc ;
define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
@@ -182,7 +203,8 @@ main_body:
}
; GCN-LABEL: {{^}}atomic_add_1d_slc:
-; GCN: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc{{$}}
+; GFX6789: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc{{$}}
+; GFX10: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc ;
define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll Wed May 1 09:32:58 2019
@@ -1,9 +1,11 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}image_load_f16:
-; GCN: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
+; GFX89: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
+; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
main_body:
%tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -13,6 +15,7 @@ main_body:
; GCN-LABEL: {{^}}image_load_v2f16:
; UNPACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
; PACKED: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
+; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
define amdgpu_ps float @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
main_body:
%tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -23,6 +26,7 @@ main_body:
; GCN-LABEL: {{^}}image_load_v4f16:
; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}}
define amdgpu_ps <2 x float> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
main_body:
%tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -33,6 +37,7 @@ main_body:
; GCN-LABEL: {{^}}image_load_mip_v4f16:
; UNPACKED: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
; PACKED: image_load_mip v[0:1], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
+; GFX10: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}}
define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
main_body:
%tex = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
@@ -43,6 +48,7 @@ main_body:
; GCN-LABEL: {{^}}image_load_3d_v2f16:
; UNPACKED: image_load v[0:1], v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
; PACKED: image_load v0, v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
+; GFX10: image_load v0, v[0:2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm d16{{$}}
define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
main_body:
%tex = call <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32 3, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
@@ -51,7 +57,8 @@ main_body:
}
; GCN-LABEL: {{^}}image_store_f16
-; GCN: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
+; GFX89: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
+; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
main_body:
call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -63,6 +70,7 @@ main_body:
; UNPACKED: v_and_b32_e32
; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
; PACKED: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
+; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, float %in) {
main_body:
%data = bitcast float %in to <2 x half>
@@ -77,6 +85,7 @@ main_body:
; UNPACKED: v_and_b32_e32
; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+; GFX10: image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}}
define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
main_body:
%data = bitcast <2 x float> %in to <4 x half>
@@ -91,6 +100,7 @@ main_body:
; UNPACKED: v_and_b32_e32
; UNPACKED: image_store_mip v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
; PACKED: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
+; GFX10: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16{{$}}
define amdgpu_ps void @image_store_mip_1d_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %mip, <2 x float> %in) {
main_body:
%data = bitcast <2 x float> %in to <4 x half>
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll Wed May 1 09:32:58 2019
@@ -1,10 +1,12 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16:
; UNPACKED: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}}
; PACKED: image_gather4_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}}
+; GFX10: image_gather4_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D d16{{$}}
define amdgpu_ps <2 x float> @image_gather4_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
main_body:
%tex = call <4 x half> @llvm.amdgcn.image.gather4.b.2d.v4f16.f32.f32(i32 4, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll Wed May 1 09:32:58 2019
@@ -1,8 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}gather4_2d:
-; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -10,7 +12,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_cube:
-; GCN: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
+; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
+; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ;
define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -18,7 +21,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_2darray:
-; GCN: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
+; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
+; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ;
define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -26,7 +30,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_c_2d:
-; GCN: image_gather4_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -34,7 +39,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_cl_2d:
-; GCN: image_gather4_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -42,7 +48,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_c_cl_2d:
-; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -50,7 +57,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_b_2d:
-; GCN: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -58,7 +66,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_c_b_2d:
-; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -66,7 +75,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_b_cl_2d:
-; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -74,7 +84,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
-; GCN: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -82,7 +93,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_l_2d:
-; GCN: image_gather4_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -90,7 +102,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_c_l_2d:
-; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -98,7 +111,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_lz_2d:
-; GCN: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -106,7 +120,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_c_lz_2d:
-; GCN: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX10: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -114,7 +129,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_2d_dmask_2:
-; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2{{$}}
+; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2{{$}}
+; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -122,7 +138,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_2d_dmask_4:
-; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4{{$}}
+; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4{{$}}
+; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -130,7 +147,8 @@ main_body:
}
; GCN-LABEL: {{^}}gather4_2d_dmask_8:
-; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8{{$}}
+; GFX6789: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8{{$}}
+; GFX10: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll Wed May 1 09:32:58 2019
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GCN,PRE-GFX10 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefixes=GCN,PRE-GFX10 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck --check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}getlod_1d:
-; GCN: image_get_lod v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+; PRE-GFX10: image_get_lod v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_get_lod v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @getlod_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
@@ -11,7 +13,8 @@ main_body:
}
; GCN-LABEL: {{^}}getlod_2d:
-; GCN: image_get_lod v[0:1], v[0:1], s[0:7], s[8:11] dmask:0x3{{$}}
+; PRE-GFX10: image_get_lod v[0:1], v[0:1], s[0:7], s[8:11] dmask:0x3{{$}}
+; GFX10: image_get_lod v[0:1], v[0:1], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_2D
; GCN: s_waitcnt vmcnt(0)
define amdgpu_ps <2 x float> @getlod_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll?rev=359698&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll Wed May 1 09:32:58 2019
@@ -0,0 +1,91 @@
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-nsa-encoding -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,NONSA %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,NSA %s
+
+; GCN-LABEL: {{^}}sample_2d:
+;
+; TODO: use NSA here
+; GCN: v_mov_b32_e32 v2, v0
+;
+; GCN: image_sample v[0:3], v[1:2],
+define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %t, float %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_3d:
+; NONSA: v_mov_b32_e32 v3, v0
+; NONSA: image_sample v[0:3], v[1:4],
+; NSA: image_sample v[0:3], [v1, v2, v0],
+define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_3d:
+; NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1],
+define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_contig_nsa:
+; GCN: image_sample_c_l v0, v[0:7],
+; NSA: image_sample v1, [v6, v7, v5],
+define amdgpu_ps <2 x float> @sample_contig_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
+main_body:
+ %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %r.0 = insertelement <2 x float> undef, float %v1, i32 0
+ %r = insertelement <2 x float> %r.0, float %v2, i32 1
+ ret <2 x float> %r
+}
+
+; GCN-LABEL: {{^}}sample_nsa_nsa:
+; NSA: image_sample_c_l v0, [v1, v2, v3, v4, v0],
+; NSA: image_sample v1, [v6, v7, v5],
+define amdgpu_ps <2 x float> @sample_nsa_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %r2, float %s2, float %t2) {
+main_body:
+ %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %r.0 = insertelement <2 x float> undef, float %v1, i32 0
+ %r = insertelement <2 x float> %r.0, float %v2, i32 1
+ ret <2 x float> %r
+}
+
+; GCN-LABEL: {{^}}sample_nsa_contig:
+; NSA: image_sample_c_l v0, [v1, v2, v3, v4, v0],
+; NSA: image_sample v1, v[5:7],
+define amdgpu_ps <2 x float> @sample_nsa_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %lod, float %zcompare, float %s1, float %t1, float %r1, float %s2, float %t2, float %r2) {
+main_body:
+ %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %r.0 = insertelement <2 x float> undef, float %v1, i32 0
+ %r = insertelement <2 x float> %r.0, float %v2, i32 1
+ ret <2 x float> %r
+}
+
+; GCN-LABEL: {{^}}sample_contig_contig:
+; GCN: image_sample_c_l v0, v[0:7],
+; NSA: image_sample v1, v[5:7],
+; NONSA: image_sample v1, v[5:8],
+define amdgpu_ps <2 x float> @sample_contig_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %s2, float %t2, float %r2) {
+main_body:
+ %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2, float %t2, float %r2, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ %r.0 = insertelement <2 x float> undef, float %v1, i32 0
+ %r = insertelement <2 x float> %r.0, float %v2, i32 1
+ ret <2 x float> %r
+}
+
+
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #1 = { nounwind readonly }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll Wed May 1 09:32:58 2019
@@ -1,9 +1,11 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=UNPACKED %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}image_sample_2d_f16:
-; GCN: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16{{$}}
+; GFX89: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16{{$}}
+; GFX10: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D d16{{$}}
define amdgpu_ps half @image_sample_2d_f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%tex = call half @llvm.amdgcn.image.sample.2d.f16.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
@@ -26,6 +28,7 @@ main_body:
; GCN-LABEL: {{^}}image_sample_c_d_1d_v2f16:
; UNPACKED: image_sample_c_d v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}}
; PACKED: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}}
+; GFX10: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D d16{{$}}
define amdgpu_ps float @image_sample_c_d_1d_v2f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
main_body:
%tex = call <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32 3, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
@@ -52,6 +55,7 @@ main_body:
; GCN-LABEL: {{^}}image_sample_b_2d_v4f16:
; UNPACKED: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}}
; PACKED: image_sample_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}}
+; GFX10: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D d16{{$}}
define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
main_body:
%tex = call <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll Wed May 1 09:32:58 2019
@@ -1,8 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}sample_1d:
-; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -15,7 +17,8 @@ main_body:
; GCN: v_mov_b32_e32 v2, v0
; GCN: v_mov_b32_e32 v3, v0
; GCN: v_mov_b32_e32 v4, v0
-; GCN: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe{{$}}
+; GFX6789: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe{{$}}
+; GFX10: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ;
define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -28,7 +31,8 @@ main_body:
; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_1:
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
-; GCN: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe{{$}}
+; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe{{$}}
+; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ;
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -44,7 +48,8 @@ main_body:
; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_2:
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
-; GCN: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe{{$}}
+; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe{{$}}
+; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe ;
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -60,7 +65,8 @@ main_body:
; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_3:
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
-; GCN: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe{{$}}
+; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe{{$}}
+; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe ;
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -76,7 +82,8 @@ main_body:
; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_4:
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
-; GCN: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe{{$}}
+; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe{{$}}
+; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe ;
define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -93,7 +100,8 @@ main_body:
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
; GCN: v_mov_b32_e32 v2, v0
-; GCN: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe{{$}}
+; GFX6789: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe{{$}}
+; GFX10: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe ;
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -112,7 +120,8 @@ main_body:
; GCN: v_mov_b32_e32 v0, 0
; GCN: v_mov_b32_e32 v1, v0
; GCN: v_mov_b32_e32 v2, v0
-; GCN: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe{{$}}
+; GFX6789: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe{{$}}
+; GFX10: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe ;
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -132,7 +141,8 @@ main_body:
; GCN: v_mov_b32_e32 v1, v0
; GCN: v_mov_b32_e32 v2, v0
; GCN: v_mov_b32_e32 v3, v0
-; GCN: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe{{$}}
+; GFX6789: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe{{$}}
+; GFX10: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe ;
define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -155,7 +165,8 @@ main_body:
; GCN: v_mov_b32_e32 v2, v0
; GCN: v_mov_b32_e32 v3, v0
; GCN: v_mov_b32_e32 v4, v0
-; GCN: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe{{$}}
+; GFX6789: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe{{$}}
+; GFX10: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ;
define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
@@ -166,7 +177,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_2d:
-; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -174,7 +186,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_3d:
-; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ;
define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -182,7 +195,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_cube:
-; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}}
+; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}}
+; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ;
define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -190,7 +204,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_1darray:
-; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da{{$}}
+; GFX6789: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da{{$}}
+; GFX10: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ;
define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -198,7 +213,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_2darray:
-; GCN: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}}
+; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}}
+; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ;
define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -206,7 +222,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_1d:
-; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -214,7 +231,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_2d:
-; GCN: image_sample_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -222,7 +240,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_cl_1d:
-; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -230,7 +249,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_cl_2d:
-; GCN: image_sample_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -238,7 +258,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_cl_1d:
-; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -246,7 +267,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_cl_2d:
-; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -254,7 +276,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_b_1d:
-; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -262,7 +285,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_b_2d:
-; GCN: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -270,7 +294,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_b_1d:
-; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -278,7 +303,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_b_2d:
-; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -286,7 +312,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_b_cl_1d:
-; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -294,7 +321,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_b_cl_2d:
-; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -302,7 +330,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_b_cl_1d:
-; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -310,7 +339,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_b_cl_2d:
-; GCN: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -318,7 +348,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_d_1d:
-; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -326,7 +357,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_d_2d:
-; GCN: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -334,7 +366,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_d_1d:
-; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -342,7 +375,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_d_2d:
-; GCN: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -350,7 +384,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_d_cl_1d:
-; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -358,7 +393,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_d_cl_2d:
-; GCN: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -366,7 +402,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_d_cl_1d:
-; GCN: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -374,7 +411,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_d_cl_2d:
-; GCN: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -382,7 +420,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_cd_1d:
-; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -390,7 +429,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_cd_2d:
-; GCN: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -398,7 +438,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_cd_1d:
-; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -406,7 +447,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_cd_2d:
-; GCN: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -414,7 +456,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_cd_cl_1d:
-; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -422,7 +465,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_cd_cl_2d:
-; GCN: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -430,7 +474,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
-; GCN: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -438,7 +483,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
-; GCN: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -446,7 +492,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_l_1d:
-; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -454,7 +501,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_l_2d:
-; GCN: image_sample_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -462,7 +510,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_l_1d:
-; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -470,7 +519,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_l_2d:
-; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -478,7 +528,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_lz_1d:
-; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -486,7 +537,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_lz_2d:
-; GCN: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -494,7 +546,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_lz_1d:
-; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -502,7 +555,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_lz_2d:
-; GCN: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX6789: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GFX10: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -510,7 +564,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
-; GCN: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da{{$}}
+; GFX6789: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da{{$}}
+; GFX10: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ;
define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
main_body:
%v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -518,7 +573,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1_tfe:
-; GCN: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da{{$}}
+; GFX6789: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da{{$}}
+; GFX10: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ;
define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) {
main_body:
%v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -529,7 +585,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
-; GCN: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da{{$}}
+; GFX6789: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da{{$}}
+; GFX10: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ;
define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
main_body:
%v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -537,7 +594,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2_tfe:
-; GCN: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da{{$}}
+; GFX6789: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da{{$}}
+; GFX10: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ;
define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
main_body:
%v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
@@ -553,7 +611,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_1d_unorm:
-; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm{{$}}
+; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm{{$}}
+; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
@@ -561,7 +620,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_1d_glc:
-; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc{{$}}
+; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc{{$}}
+; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc ;
define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
@@ -569,7 +629,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_1d_slc:
-; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc{{$}}
+; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc{{$}}
+; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc ;
define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
@@ -577,7 +638,8 @@ main_body:
}
; GCN-LABEL: {{^}}sample_1d_glc_slc:
-; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc{{$}}
+; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc{{$}}
+; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc ;
define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
@@ -585,7 +647,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_0:
-; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1{{$}}
+; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -594,7 +656,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_01
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3{{$}}
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -603,7 +665,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_012
-; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7{{$}}
+; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -612,7 +674,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_12
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -621,7 +683,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_03
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9{{$}}
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -630,7 +692,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_13
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -639,7 +701,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_123
-; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe{{$}}
+; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -656,7 +718,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -665,7 +727,7 @@ main_body:
}
; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
+; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll Wed May 1 09:32:58 2019
@@ -1,9 +1,11 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED,PREGFX10,PREGFX10-UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s
; GCN-LABEL: {{^}}tbuffer_load_d16_x:
-; GCN: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; PREGFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; GFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], format:22, 0
define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
main_body:
%data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
@@ -11,10 +13,11 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_d16_xy:
-; UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; GFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], format:22, 0
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]]
define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
main_body:
@@ -24,10 +27,12 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw:
-; UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; GFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], format:22, 0
; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0
+; GFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], format:22, 0
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
main_body:
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll Wed May 1 09:32:58 2019
@@ -1,13 +1,15 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED,PREGFX10,PREGFX10-UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s
; GCN-LABEL: {{^}}tbuffer_store_d16_x:
-; GCN: s_load_dwordx4
-; GCN: s_load_dword s[[S_LO:[0-9]+]]
-; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]]
-; GCN: tbuffer_store_format_d16_x v[[V_LO]], off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
+; GCN-DAG: s_load_dwordx4
+; GCN-DAG: s_load_dword s[[S_LO:[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]]
+; PREGFX10: tbuffer_store_format_d16_x v[[V_LO]], off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
+; GFX10: tbuffer_store_format_d16_x v[[V_LO]], off, s[{{[0-9]+:[0-9]+}}], format:33, 0
define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data) {
main_body:
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
@@ -15,14 +17,15 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_store_d16_xy:
-; GCN: s_load_dword [[S_DATA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN: s_load_dword [[S_DATA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}},
; UNPACKED-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[S_DATA]], 16
; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}}
; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
-; PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
+; PREGFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
+; GFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], format:33, 0
define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data) {
main_body:
call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
@@ -30,7 +33,7 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_store_d16_xyzw:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x10
+; GCN-DAG: s_load_dwordx2 s{{\[}}[[S_DATA_0:[0-9]+]]:[[S_DATA_1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}},
; UNPACKED-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
; UNPACKED-DAG: s_lshr_b32 [[SHR0:s[0-9]+]], s[[S_DATA_0]], 16
@@ -40,12 +43,13 @@ main_body:
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-; PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
+; PREGFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0
+; GFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, off, s[{{[0-9]+:[0-9]+}}], format:33, 0
define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data) {
main_body:
call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll Wed May 1 09:32:58 2019
@@ -1,11 +1,16 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE,PREGFX10 %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,PREGFX10 %s
+;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}tbuffer_store:
-; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:12, nfmt:2, 0
-; GCN: tbuffer_store_format_xyzw v[4:7], off, s[0:3], dfmt:13, nfmt:3, 0 glc
-; GCN: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 slc
-; GCN: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:12, nfmt:2, 0
+; PREGFX10: tbuffer_store_format_xyzw v[4:7], off, s[0:3], dfmt:13, nfmt:3, 0 glc
+; PREGFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 slc
+; PREGFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], dfmt:14, nfmt:4, 0 glc
+; GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:44, 0
+; GFX10: tbuffer_store_format_xyzw v[4:7], off, s[0:3], format:61, 0 glc
+; GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 slc
+; GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 glc dlc
define amdgpu_ps void @tbuffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
main_body:
%in1 = bitcast <4 x float> %1 to <4 x i32>
@@ -14,12 +19,13 @@ main_body:
call void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 44, i32 0)
call void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32> %in2, <4 x i32> %0, i32 0, i32 0, i32 61, i32 1)
call void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32> %in3, <4 x i32> %0, i32 0, i32 0, i32 78, i32 2)
- call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 78, i32 0)
+ call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 78, i32 5)
ret void
}
; GCN-LABEL: {{^}}tbuffer_store_immoffs:
-; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, 0 offset:42
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, 0 offset:42
+; GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, 0 offset:42
define amdgpu_ps void @tbuffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
main_body:
%in1 = bitcast <4 x float> %1 to <4 x i32>
@@ -28,7 +34,8 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_store_scalar_and_imm_offs:
-; GCN: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} offset:42
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} offset:42
+; GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, {{s[0-9]+}} offset:42
define amdgpu_ps void @tbuffer_store_scalar_and_imm_offs(<4 x i32> inreg, <4 x float> %vdata, i32 inreg %soffset) {
main_body:
%in1 = bitcast <4 x float> %vdata to <4 x i32>
@@ -37,7 +44,8 @@ main_body:
}
; GCN-LABEL: {{^}}buffer_store_ofs:
-; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:3, nfmt:7, 0 offen
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:3, nfmt:7, 0 offen
+; GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:115, 0 offen
define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float> %vdata, i32 %voffset) {
main_body:
%in1 = bitcast <4 x float> %vdata to <4 x i32>
@@ -46,7 +54,8 @@ main_body:
}
; GCN-LABEL: {{^}}buffer_store_x1:
-; GCN: tbuffer_store_format_x v0, off, s[0:3], dfmt:13, nfmt:7, 0
+; PREGFX10: tbuffer_store_format_x v0, off, s[0:3], dfmt:13, nfmt:7, 0
+; GFX10: tbuffer_store_format_x v0, off, s[0:3], format:125, 0
define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data) {
main_body:
%data.i = bitcast float %data to i32
@@ -55,7 +64,8 @@ main_body:
}
; GCN-LABEL: {{^}}buffer_store_x2:
-; GCN: tbuffer_store_format_xy v[0:1], off, s[0:3], dfmt:1, nfmt:2, 0
+; PREGFX10: tbuffer_store_format_xy v[0:1], off, s[0:3], dfmt:1, nfmt:2, 0
+; GFX10: tbuffer_store_format_xy v[0:1], off, s[0:3], format:33, 0
define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data) {
main_body:
%data.i = bitcast <2 x float> %data to <2 x i32>
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll Wed May 1 09:32:58 2019
@@ -1,10 +1,12 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED,PREGFX10,PREGFX10-UNPACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s
; GCN-LABEL: {{^}}tbuffer_load_d16_x:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; GCN: tbuffer_load_format_d16_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
+; PREGFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
+; GFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], format:22, 0 idxen
define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
main_body:
%data = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
@@ -13,10 +15,11 @@ main_body:
; GCN-LABEL: {{^}}tbuffer_load_d16_xy:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
-; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
+; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
+; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
+; GFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], format:22, 0 idxen
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]]
define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
main_body:
@@ -27,10 +30,11 @@ main_body:
; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
-; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
+; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
+; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
-; PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
+; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], dfmt:6, nfmt:1, 0 idxen
+; GFX10-PACKED: tbuffer_load_format_d16_xyzw v{{\[}}{{[0-9]+}}:[[HI:[0-9]+]]{{\]}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], format:22, 0 idxen
; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
main_body:
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll Wed May 1 09:32:58 2019
@@ -1,19 +1,24 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,PREGFX10 %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,PREGFX10 %s
+;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}tbuffer_load:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:3, 0 idxen glc
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 idxen slc
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 idxen
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:3, 0 idxen glc
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 idxen slc
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:6, nfmt:1, 0 idxen glc
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:63, 0 idxen glc
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:22, 0 idxen slc
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:22, 0 idxen glc dlc
; GCN: s_waitcnt
define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) {
main_body:
%vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 0)
%vdata_glc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 63, i32 1)
%vdata_slc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 2)
- %vdata_f32 = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 0)
+ %vdata_f32 = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 5)
%vdata.f = bitcast <4 x i32> %vdata to <4 x float>
%vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
%vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
@@ -26,7 +31,8 @@ main_body:
; GCN-LABEL: {{^}}tbuffer_load_immoffs:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:42
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:42
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen offset:42
define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) {
main_body:
%vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 42, i32 0, i32 78, i32 0)
@@ -36,9 +42,12 @@ main_body:
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:2, 61 idxen offset:4095
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:3, {{s[0-9]+}} idxen offset:73
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, {{s[0-9]+}} idxen offset:1
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:15, nfmt:2, 61 idxen offset:4095
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:3, {{s[0-9]+}} idxen offset:73
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, {{s[0-9]+}} idxen offset:1
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:47, 61 idxen offset:4095
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:62, {{s[0-9]+}} idxen offset:73
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, format:77, {{s[0-9]+}} idxen offset:1
; GCN: s_waitcnt
define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
%vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 4095, i32 61, i32 47, i32 0)
@@ -54,7 +63,8 @@ define amdgpu_vs {<4 x float>, <4 x floa
}
; GCN-LABEL: {{^}}tbuffer_load_idx:
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen
define amdgpu_vs <4 x float> @tbuffer_load_idx(<4 x i32> inreg, i32 %vindex) {
main_body:
%vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 78, i32 0)
@@ -63,7 +73,8 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_ofs:
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen offen
define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) {
main_body:
%vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 78, i32 0)
@@ -72,7 +83,8 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_ofs_imm:
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen offset:52
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen offset:52
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen offen offset:52
define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) {
main_body:
%ofs = add i32 %voffs, 52
@@ -82,7 +94,8 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_both:
-; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen
+; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offen
+; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, format:78, 0 idxen offen
define amdgpu_vs <4 x float> @tbuffer_load_both(<4 x i32> inreg, i32 %vindex, i32 %voffs) {
main_body:
%vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 %voffs, i32 0, i32 78, i32 0)
@@ -92,7 +105,8 @@ main_body:
; GCN-LABEL: {{^}}buffer_load_xy:
-; GCN: tbuffer_load_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 idxen
+; PREGFX10: tbuffer_load_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 idxen
+; GFX10: tbuffer_load_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, format:77, 0 idxen
define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
%vdata = call <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0)
%vdata.f = bitcast <2 x i32> %vdata to <2 x float>
@@ -100,7 +114,8 @@ define amdgpu_vs <2 x float> @buffer_loa
}
; GCN-LABEL: {{^}}buffer_load_x:
-; GCN: tbuffer_load_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 idxen
+; PREGFX10: tbuffer_load_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 idxen
+; GFX10: tbuffer_load_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, format:77, 0 idxen
define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
%vdata = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0)
%vdata.f = bitcast i32 %vdata to float
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll Wed May 1 09:32:58 2019
@@ -1,13 +1,15 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX81 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PREGFX10,UNPACKED,PREGFX10-UNPACKED %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PREGFX10,PACKED,GFX81,PREGFX10-PACKED %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PREGFX10,PACKED,GFX9,PREGFX10-PACKED %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,PACKED,GFX10-PACKED %s
; GCN-LABEL: {{^}}tbuffer_store_d16_x:
-; GCN: s_load_dwordx4
-; GCN: s_load_dword{{[x0-9]*}} s{{\[}}[[S_LO:[0-9]+]]
-; GCN: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]]
-; GCN: tbuffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+; GCN-DAG: s_load_dwordx4
+; GCN-DAG: s_load_dword{{[x0-2]*}} s{{\[}}[[S_LO:[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[S_LO]]
+; PREGFX10: tbuffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+; GFX10: tbuffer_store_format_d16_x v[[V_LO]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], format:33, 0 idxen
define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data, i32 %vindex) {
main_body:
call void @llvm.amdgcn.struct.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
@@ -20,9 +22,10 @@ main_body:
; UNPACKED-DAG: s_and_b32 [[MASKED:s[0-9]+]], [[S_DATA]], 0xffff{{$}}
; UNPACKED-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[MASKED]]
; UNPACKED-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], [[SHR]]
-; UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xy v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
-; PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+; PREGFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+; GFX10-PACKED: tbuffer_store_format_d16_xy v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], format:33, 0 idxen
define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %vindex) {
main_body:
call void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
@@ -40,12 +43,12 @@ main_body:
; UNPACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[MASKED0]]
; UNPACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHR1]]
-; UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
-
+; PREGFX10-UNPACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
; PACKED-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s[[S_DATA_0]]
; PACKED-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s[[S_DATA_1]]
-; PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+; PREGFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], dfmt:1, nfmt:2, 0 idxen
+; GFX10-PACKED: tbuffer_store_format_d16_xyzw v{{\[}}[[LO]]:[[HI]]{{\]}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], format:33, 0 idxen
define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
main_body:
call void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll?rev=359698&r1=359697&r2=359698&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll Wed May 1 09:32:58 2019
@@ -1,12 +1,17 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=GCN,VERDE,PREGFX10 %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,PREGFX10 %s
+;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}tbuffer_store:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:12, nfmt:2, 0 idxen
-; GCN: tbuffer_store_format_xyzw v[4:7], [[ZEROREG]], s[0:3], dfmt:13, nfmt:3, 0 idxen glc
-; GCN: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen slc
-; GCN: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:12, nfmt:2, 0 idxen
+; PREGFX10: tbuffer_store_format_xyzw v[4:7], [[ZEROREG]], s[0:3], dfmt:13, nfmt:3, 0 idxen glc
+; PREGFX10: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen slc
+; PREGFX10: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], dfmt:14, nfmt:4, 0 idxen glc
+; GFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], format:44, 0 idxen
+; GFX10: tbuffer_store_format_xyzw v[4:7], [[ZEROREG]], s[0:3], format:61, 0 idxen glc
+; GFX10: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], format:78, 0 idxen slc
+; GFX10: tbuffer_store_format_xyzw v[8:11], [[ZEROREG]], s[0:3], format:78, 0 idxen glc dlc
define amdgpu_ps void @tbuffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
main_body:
%in1 = bitcast <4 x float> %1 to <4 x i32>
@@ -15,13 +20,14 @@ main_body:
call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 44, i32 0)
call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 61, i32 1)
call void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32> %in3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 2)
- call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 0)
+ call void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float> %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 5)
ret void
}
; GCN-LABEL: {{^}}tbuffer_store_immoffs:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, 0 idxen offset:42
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, 0 idxen offset:42
+; GFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], format:117, 0 idxen offset:42
define amdgpu_ps void @tbuffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
main_body:
%in1 = bitcast <4 x float> %1 to <4 x i32>
@@ -31,7 +37,8 @@ main_body:
; GCN-LABEL: {{^}}tbuffer_store_scalar_and_imm_offs:
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
-; GCN: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} idxen offset:42
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], dfmt:5, nfmt:7, {{s[0-9]+}} idxen offset:42
+; GFX10: tbuffer_store_format_xyzw v[0:3], [[ZEROREG]], s[0:3], format:117, {{s[0-9]+}} idxen offset:42
define amdgpu_ps void @tbuffer_store_scalar_and_imm_offs(<4 x i32> inreg, <4 x float> %vdata, i32 inreg %soffset) {
main_body:
%in1 = bitcast <4 x float> %vdata to <4 x i32>
@@ -40,7 +47,8 @@ main_body:
}
; GCN-LABEL: {{^}}buffer_store_idx:
-; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:2, 0 idxen
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:2, 0 idxen
+; GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:47, 0 idxen
define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex) {
main_body:
%in1 = bitcast <4 x float> %vdata to <4 x i32>
@@ -49,7 +57,8 @@ main_body:
}
; GCN-LABEL: {{^}}buffer_store_ofs:
-; GCN: tbuffer_store_format_xyzw v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], dfmt:3, nfmt:7, 0 idxen offen
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], dfmt:3, nfmt:7, 0 idxen offen
+; GFX10: tbuffer_store_format_xyzw v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], format:115, 0 idxen offen
define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float> %vdata, i32 %voffset) {
main_body:
%in1 = bitcast <4 x float> %vdata to <4 x i32>
@@ -58,7 +67,8 @@ main_body:
}
; GCN-LABEL: {{^}}buffer_store_both:
-; GCN: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], dfmt:6, nfmt:4, 0 idxen offen
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], dfmt:6, nfmt:4, 0 idxen offen
+; GFX10: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], format:70, 0 idxen offen
define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex, i32 %voffset) {
main_body:
%in1 = bitcast <4 x float> %vdata to <4 x i32>
@@ -69,11 +79,13 @@ main_body:
; Ideally, the register allocator would avoid the wait here
;
; GCN-LABEL: {{^}}buffer_store_wait:
-; GCN: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:3, 0 idxen
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], dfmt:15, nfmt:3, 0 idxen
+; GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:63, 0 idxen
; VERDE: s_waitcnt expcnt(0)
; GCN: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
; GCN: s_waitcnt vmcnt(0)
-; GCN: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], dfmt:14, nfmt:2, 0 idxen
+; PREGFX10: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], dfmt:14, nfmt:2, 0 idxen
+; GFX10: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen
define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float> %vdata, i32 %vindex.1, i32 %vindex.2, i32 %vindex.3) {
main_body:
%in1 = bitcast <4 x float> %vdata to <4 x i32>
@@ -85,7 +97,8 @@ main_body:
}
; GCN-LABEL: {{^}}buffer_store_x1:
-; GCN: tbuffer_store_format_x v0, v1, s[0:3], dfmt:13, nfmt:7, 0 idxen
+; PREGFX10: tbuffer_store_format_x v0, v1, s[0:3], dfmt:13, nfmt:7, 0 idxen
+; GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen
define amdgpu_ps void @buffer_store_x1(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
main_body:
%data.i = bitcast float %data to i32
@@ -94,7 +107,8 @@ main_body:
}
; GCN-LABEL: {{^}}buffer_store_x2:
-; GCN: tbuffer_store_format_xy v[0:1], v2, s[0:3], dfmt:1, nfmt:2, 0 idxen
+; PREGFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], dfmt:1, nfmt:2, 0 idxen
+; GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen
define amdgpu_ps void @buffer_store_x2(<4 x i32> inreg %rsrc, <2 x float> %data, i32 %vindex) {
main_body:
%data.i = bitcast <2 x float> %data to <2 x i32>
Added: llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg.s?rev=359698&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg.s (added)
+++ llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg.s Wed May 1 09:32:58 2019
@@ -0,0 +1,380 @@
+; RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s
+
+image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
+; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+
+image_load v[1:4], [v2, v3], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX10: image_load v[1:4], [v2, v3], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x0a,0x1f,0x00,0xf0,0x02,0x01,0x01,0x00,0x03,0x00,0x00,0x00]
+
+image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_3D unorm
+; GFX10: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x12,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00]
+
+image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm
+; GFX10: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x1a,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00]
+
+image_load v[0:3], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm
+; GFX10: image_load v[0:3], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x22,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x00,0x00,0x00]
+
+image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm
+; GFX10: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x2a,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00]
+
+image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm
+; GFX10: image_load v[0:3], [v4, v5, v6], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x32,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x00,0x00]
+
+image_load v[0:3], [v4, v5, v6, v7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm
+; GFX10: image_load v[0:3], [v4, v5, v6, v7], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x3a,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00,0x05,0x06,0x07,0x00]
+
+image_load v[0:1], v0, s[0:7] dmask:0x9 dim:1D
+; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x09,0x00,0xf0,0x00,0x00,0x00,0x00]
+
+image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D dlc
+; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D dlc ; encoding: [0x80,0x01,0x00,0xf0,0x00,0x00,0x00,0x00]
+
+image_load v255, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D glc
+; GFX10: image_load v255, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x00,0xf0,0x00,0xff,0x00,0x00]
+
+image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D slc
+; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D slc ; encoding: [0x00,0x01,0x00,0xf2,0x00,0x00,0x00,0x00]
+
+image_load v0, v255, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16
+; GFX10: image_load v0, v255, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x06,0x00,0xf0,0xff,0x00,0x00,0x80]
+
+// FIXME: This test is incorrect because r128 assumes a 128-bit SRSRC.
+image_load v0, v255, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D r128
+; GFX10: image_load v0, v255, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D r128 ; encoding: [0x00,0x81,0x00,0xf0,0xff,0x00,0x00,0x00]
+
+image_load v0, v[2:3], s[0:7] dmask:0x1 dim:2D
+; GFX10: image_load v0, v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x01,0x00,0xf0,0x02,0x00,0x00,0x00]
+
+image_load v0, v[2:4], s[0:7] dmask:0x1 dim:3D
+; GFX10: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x01,0x00,0xf0,0x02,0x00,0x00,0x00]
+
+image_load v0, v[2:4], s[0:7] dmask:0x1 dim:CUBE
+; GFX10: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x01,0x00,0xf0,0x02,0x00,0x00,0x00]
+
+image_load v0, v[2:3], s[0:7] dmask:0x1 dim:1D_ARRAY
+; GFX10: image_load v0, v[2:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x20,0x01,0x00,0xf0,0x02,0x00,0x00,0x00]
+
+image_load v0, v[2:4], s[0:7] dmask:0x1 dim:2D_ARRAY
+; GFX10: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x01,0x00,0xf0,0x02,0x00,0x00,0x00]
+
+image_load v0, v[2:4], s[0:7] dmask:0x1 dim:2D_MSAA
+; GFX10: image_load v0, v[2:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA ; encoding: [0x30,0x01,0x00,0xf0,0x02,0x00,0x00,0x00]
+
+image_load v0, v[2:5], s[0:7] dmask:0x1 dim:2D_MSAA_ARRAY
+; GFX10: image_load v0, v[2:5], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x38,0x01,0x00,0xf0,0x02,0x00,0x00,0x00]
+
+image_load_mip v[252:255], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX10: image_load_mip v[252:255], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x04,0xf0,0x00,0xfc,0x00,0x00]
+
+image_load_mip v[253:255], [v255, v254], s[0:7] dmask:0xe dim:SQ_RSRC_IMG_1D
+; GFX10: image_load_mip v[253:255], [v255, v254], s[0:7] dmask:0xe dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0e,0x04,0xf0,0xff,0xfd,0x00,0x00,0xfe,0x00,0x00,0x00]
+
+image_load_mip v[254:255], [v254, v255, v253], s[0:7] dmask:0xc dim:SQ_RSRC_IMG_2D
+; GFX10: image_load_mip v[254:255], [v254, v255, v253], s[0:7] dmask:0xc dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0c,0x04,0xf0,0xfe,0xfe,0x00,0x00,0xff,0xfd,0x00,0x00]
+
+image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_3D
+; GFX10: image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00]
+
+image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_CUBE
+; GFX10: image_load_mip v255, [v254, v255, v253, v252], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xfc,0x00]
+
+image_load_mip v255, [v254, v255, v253], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10: image_load_mip v255, [v254, v255, v253], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0x00,0x00]
+
+image_load_mip v255, [v254, v255, v253, v255], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10: image_load_mip v255, [v254, v255, v253, v255], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x08,0x04,0xf0,0xfe,0xff,0x00,0x00,0xff,0xfd,0xff,0x00]
+
+image_store v[0:3], [v254, v255, v253, v255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
+; GFX10: image_store v[0:3], [v254, v255, v253, v255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x3a,0x0f,0x20,0xf0,0xfe,0x00,0x18,0x00,0xff,0xfd,0xff,0x00]
+
+image_store v[0:3], v[254:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10: image_store v[0:3], v[254:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x20,0xf0,0xfe,0x00,0x18,0x00]
+
+image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D
+; GFX10: image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x24,0xf0,0xfd,0x00,0x18,0x00]
+
+image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D
+; GFX10: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x0f,0x38,0xf0,0x20,0x04,0x18,0x00]
+
+image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc
+; GFX10: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x10,0x21,0x3c,0xf0,0x20,0x04,0x18,0x00]
+
+image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc
+; GFX10: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x12,0x23,0x40,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
+
+image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc
+; GFX10: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x1a,0x23,0x44,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
+
+image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc
+; GFX10: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x22,0x21,0x48,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00]
+
+image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc
+; GFX10: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x2a,0x21,0x50,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
+
+image_atomic_umin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA glc
+; GFX10: image_atomic_umin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA glc ; encoding: [0x32,0x21,0x54,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
+
+image_atomic_smax v4, [v32, v1, v2, v3], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY glc
+; GFX10: image_atomic_smax v4, [v32, v1, v2, v3], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY glc ; encoding: [0x3a,0x21,0x58,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x03,0x00]
+
+image_atomic_umax v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D glc
+; GFX10: image_atomic_umax v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D glc ; encoding: [0x0a,0x21,0x5c,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00]
+
+image_atomic_and v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc
+; GFX10: image_atomic_and v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x60,0xf0,0x20,0x04,0x18,0x00]
+
+image_atomic_or v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc
+; GFX10: image_atomic_or v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x64,0xf0,0x20,0x04,0x18,0x00]
+
+image_atomic_xor v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc
+; GFX10: image_atomic_xor v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x68,0xf0,0x20,0x04,0x18,0x00]
+
+image_atomic_inc v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc
+; GFX10: image_atomic_inc v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x6c,0xf0,0x20,0x04,0x18,0x00]
+
+image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc
+; GFX10: image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x21,0x70,0xf0,0x20,0x04,0x18,0x00]
+
+;image_atomic_fcmpswap v[4:5], v32, s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_1D glc
+; TODO-GFX10: ; encoding: [0x00,0x23,0x74,0xf0,0x20,0x04,0x18,0x00]
+
+;image_atomic_fmin v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc
+; TODO-GFX10: ; encoding: [0x00,0x21,0x78,0xf0,0x20,0x04,0x18,0x00]
+
+;image_atomic_fmax v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc
+; TODO-GFX10: ; encoding: [0x00,0x21,0x7c,0xf0,0x20,0x04,0x18,0x00]
+
+image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D
+; GFX10: image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x80,0xf0,0x20,0x40,0x21,0x03]
+
+image_sample_cl v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D
+; GFX10: image_sample_cl v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x00,0x00]
+
+image_sample_cl v[64:66], [v32, v16, v15], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
+; GFX10: image_sample_cl v[64:66], [v32, v16, v15], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x0f,0x00,0x00]
+
+image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x0f,0x14,0x00]
+
+image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE
+; GFX10: image_sample_cl v[64:66], [v32, v16, v15, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x0f,0x14,0x00]
+
+image_sample_cl v[64:66], [v32, v16, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10: image_sample_cl v[64:66], [v32, v16, v20], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x14,0x00,0x00]
+
+image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10: image_sample_cl v[64:66], [v32, v16, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x07,0x84,0xf0,0x20,0x40,0x21,0x03,0x10,0x14,0x15,0x00]
+
+image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D
+; GFX10: image_sample_d v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x00,0x00]
+
+image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
+; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x00,0x00]
+
+image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D
+; GFX10: image_sample_d v[64:66], v[32:39], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]
+
+image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x14,0x15]
+
+image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_d v[64:66], v[32:47], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x07,0x88,0xf0,0x20,0x40,0x21,0x03]
+
+image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE
+; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x05,0x00,0x00]
+
+image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x00]
+
+image_sample_d v[64:66], [v32, v16, v8, v4, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10: image_sample_d v[64:66], [v32, v16, v8, v4, v0, v20, v21], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x07,0x88,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x00,0x14,0x15,0x00,0x00]
+
+image_sample_d_cl v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21, v48], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_d_cl v[64:66], [v32, v16, v8, v4, v2, v1, v0, v20, v21, v48], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0x8c,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x02,0x01,0x00,0x14,0x15,0x30,0x00,0x00,0x00]
+
+image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10: image_sample_l v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x90,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x00,0x00]
+
+image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10: image_sample_b v[64:66], [v32, v16, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x94,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x00,0x00]
+
+image_sample_b_cl v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10: image_sample_b_cl v[64:66], [v32, v16, v8, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x98,0xf0,0x20,0x40,0x21,0x03,0x10,0x08,0x04,0x00]
+
+image_sample_lz v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10: image_sample_lz v[64:66], [v32, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x07,0x9c,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x00,0x00]
+
+image_sample_c v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE
+; GFX10: image_sample_c v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x07,0xa0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x00]
+
+image_sample_c_cl v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE
+; GFX10: image_sample_c_cl v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x07,0xa4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01]
+
+image_sample_c_d v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_d v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xa8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x08,0x0c,0x10,0x11,0x00,0x00,0x00]
+
+image_sample_c_d_cl v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17, v18], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_d_cl v[64:66], [v32, v16, v0, v2, v1, v4, v8, v12, v16, v17, v18], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xac,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x08,0x0c,0x10,0x11,0x12,0x00,0x00]
+
+image_sample_c_l v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_l v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xb0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01]
+
+image_sample_c_b v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_b v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xb4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01]
+
+image_sample_c_b_cl v[64:66], [v32, v16, v0, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_b_cl v[64:66], [v32, v16, v0, v2, v1, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xb8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x05,0x00,0x00,0x00]
+
+image_sample_c_lz v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_lz v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xbc,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x00]
+
+image_sample_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xc0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x00]
+
+image_sample_cl_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_cl_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xc4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01]
+
+image_sample_d_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_d_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xc8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00,0x00]
+
+image_sample_d_cl_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_d_cl_o v[64:66], [v32, v16, v0, v2, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xcc,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x00,0x00]
+
+image_sample_l_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_l_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xd0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01]
+
+image_sample_b_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_b_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xd4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01]
+
+image_sample_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xd8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x00,0x00,0x00]
+
+image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xdc,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x00]
+
+image_sample_c_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_o v[64:66], [v32, v16, v0, v2, v1], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x07,0xe0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01]
+
+image_sample_c_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_cl_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xe4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x00,0x00,0x00]
+
+image_sample_c_d_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_d_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xe8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00]
+
+image_sample_c_d_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_d_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xec,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x00]
+
+image_sample_c_l_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_l_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xf0,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x00,0x00,0x00]
+
+image_sample_c_b_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_b_o v[64:66], [v32, v16, v0, v2, v1, v4], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xf4,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x00,0x00,0x00]
+
+image_sample_c_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xf8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x00,0x00]
+
+image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D
+; GFX10: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x07,0xfc,0xf0,0x20,0x40,0x21,0x03,0x00,0x10,0x00,0x00]
+
+image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x03]
+
+image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE
+; GFX10: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x02,0x04,0xf1,0x20,0x40,0x21,0x03]
+
+image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX10: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x04,0x10,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00]
+
+image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D
+; GFX10: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x08,0x14,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00]
+
+image_gather4_b_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10: image_gather4_b_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x01,0x18,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_gather4_lz v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_lz v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x1c,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00]
+
+image_gather4_c v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x20,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x00]
+
+image_gather4_c_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_cl v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x24,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_gather4_c_l v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_l v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x30,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_gather4_c_b v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_b v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x34,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_gather4_c_b_cl v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_b_cl v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x38,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00]
+
+image_gather4_c_lz v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_lz v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x3c,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x00]
+
+image_gather4_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x40,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x00]
+
+image_gather4_cl_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_cl_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x44,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_gather4_l_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_l_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x50,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_gather4_b_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_b_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x54,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_gather4_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x58,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00]
+
+image_gather4_lz_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_lz_o v[64:67], [v32, v0, v4, v5], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x5c,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x00]
+
+image_gather4_c_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x60,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_gather4_c_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_cl_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x64,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00]
+
+image_gather4_c_l_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_l_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x70,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00]
+
+image_gather4_c_b_o v[64:67], [v32, v0, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: ; encoding: [0x14,0x01,0x74,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x00,0x00,0x00]
+
+image_gather4_c_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_b_cl_o v[64:67], [v32, v0, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x01,0x78,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06,0x07,0x08,0x00,0x00]
+
+image_gather4_c_lz_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX10: image_gather4_c_lz_o v[64:67], [v32, v0, v4, v5, v6], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x01,0x7c,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x05,0x06]
+
+image_get_lod v64, v[32:33], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX10: image_get_lod v64, v[32:33], s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x01,0x80,0xf1,0x20,0x40,0x21,0x03]
+
+image_get_lod v[64:65], [v32, v0, v16], s[4:11], s[100:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10: image_get_lod v[64:65], [v32, v0, v16], s[4:11], s[100:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x03,0x80,0xf1,0x20,0x40,0x21,0x03,0x00,0x10,0x00,0x00]
+
+image_sample_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xa0,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07]
+
+image_sample_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xa4,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x00,0x00,0x00]
+
+image_sample_c_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_cd v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xa8,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x00,0x00,0x00]
+
+image_sample_c_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_cd_cl v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xac,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00]
+
+image_sample_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xb0,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x00,0x00,0x00]
+
+image_sample_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xb4,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00]
+
+image_sample_c_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_cd_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xb8,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x00,0x00]
+
+image_sample_c_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D
+; GFX10: image_sample_c_cd_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5, v6, v7, v8, v9, v10], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x07,0xbc,0xf1,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x00]
Added: llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg_err.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg_err.s?rev=359698&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg_err.s (added)
+++ llvm/trunk/test/MC/AMDGPU/gfx10_asm_mimg_err.s Wed May 1 09:32:58 2019
@@ -0,0 +1,38 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=NOGFX10 %s
+
+; TODO: more helpful error message for missing dim operand
+image_load v[0:3], v0, s[0:7] dmask:0xf unorm
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D da
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+image_load_pck v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D d16
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image data size does not match dmask and tfe
+
+image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16
+
+image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16
+
+image_sample_d v[0:3], [v0, v1, v2, v3, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16
+
+image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16
+
+image_sample_c_d v[0:3], [v0, v1, v2, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16
+
+image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16
+
+image_sample_c_d_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match dim and a16
+
+image_load v[0:1], v0, s[0:7] dmask:0x9 dim:1 D
+; NOGFX10: :[[@LINE-1]]:{{[0-9]+}}: error: failed parsing operand
Added: llvm/trunk/test/MC/AMDGPU/mtbuf-gfx10.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/mtbuf-gfx10.s?rev=359698&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/mtbuf-gfx10.s (added)
+++ llvm/trunk/test/MC/AMDGPU/mtbuf-gfx10.s Wed May 1 09:32:58 2019
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
+
+// GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0 ; encoding: [0x00,0x00,0xb0,0xe8,0x00,0x00,0x20,0x80]
+tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0
+// GFX10: tbuffer_load_format_d16_xy v0, off, s[0:3], format:22, 0 ; encoding: [0x00,0x00,0xb1,0xe8,0x00,0x00,0x20,0x80]
+tbuffer_load_format_d16_xy v0, off, s[0:3], format:22, 0
+// GFX10: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], format:22, 0 ; encoding: [0x00,0x00,0xb3,0xe8,0x00,0x00,0x20,0x80]
+tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], format:22, 0
+// GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 ; encoding: [0x00,0x00,0x73,0xea,0x00,0x00,0x00,0x80]
+tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0
+// GFX10: tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:22, 0 slc ; encoding: [0x00,0x00,0xb3,0xe8,0x00,0x08,0x40,0x80]
+tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:22, 0 slc
+// GFX10: tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:63, 0 glc ; encoding: [0x00,0x40,0xfb,0xe9,0x00,0x04,0x00,0x80]
+tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:63, 0 glc
+// GFX10: tbuffer_load_format_xyzw v[12:15], off, s[0:3], format:23, 0 glc dlc ; encoding: [0x00,0xc0,0xbb,0xe8,0x00,0x0c,0x00,0x80]
+tbuffer_load_format_xyzw v[12:15], off, s[0:3], format:23, 0 glc dlc
+// GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 offset:42 ; encoding: [0x2a,0x00,0x73,0xea,0x00,0x00,0x00,0x80]
+tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 offset:42
+// GFX10: tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:62, s4 offset:73 ; encoding: [0x49,0x00,0xf3,0xe9,0x00,0x04,0x00,0x04]
+tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:62, s4 offset:73
+// GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:47, 61 offset:4095 ; encoding: [0xff,0x0f,0x7b,0xe9,0x00,0x00,0x00,0xbd]
+tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:47, 61 offset:4095
+// GFX10: tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:77, s4 offset:1 ; encoding: [0x01,0x00,0x6b,0xea,0x00,0x08,0x00,0x04]
+tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:77, s4 offset:1
+// GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 idxen ; encoding: [0x00,0x20,0x73,0xea,0x00,0x00,0x00,0x80]
+tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 idxen
+// GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen ; encoding: [0x00,0x10,0x73,0xea,0x00,0x00,0x00,0x80]
+tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen
+// GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen offset:52 ; encoding: [0x34,0x10,0x73,0xea,0x00,0x00,0x00,0x80]
+tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen offset:52
+// GFX10: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], format:78, 0 idxen offen ; encoding: [0x00,0x30,0x73,0xea,0x00,0x00,0x00,0x80]
+tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], format:78, 0 idxen offen
+// GFX10: tbuffer_load_format_xy v[0:1], off, s[0:3], format:77, 0 ; encoding: [0x00,0x00,0x69,0xea,0x00,0x00,0x00,0x80]
+tbuffer_load_format_xy v[0:1], off, s[0:3], format:77, 0
+// GFX10: tbuffer_load_format_x v0, off, s[0:3], format:77, 0 ; encoding: [0x00,0x00,0x68,0xea,0x00,0x00,0x00,0x80]
+tbuffer_load_format_x v0, off, s[0:3], format:77, 0
+// GFX10: tbuffer_store_format_d16_x v0, v1, s[4:7], format:33, 0 idxen ; encoding: [0x00,0x20,0x0c,0xe9,0x01,0x00,0x21,0x80]
+tbuffer_store_format_d16_x v0, v1, s[4:7], format:33, 0 idxen
+// GFX10: tbuffer_store_format_d16_xy v0, v1, s[4:7], format:33, 0 idxen ; encoding: [0x00,0x20,0x0d,0xe9,0x01,0x00,0x21,0x80]
+tbuffer_store_format_d16_xy v0, v1, s[4:7], format:33, 0 idxen
+// GFX10: tbuffer_store_format_d16_xyzw v[0:1], v2, s[4:7], format:33, 0 idxen ; encoding: [0x00,0x20,0x0f,0xe9,0x02,0x00,0x21,0x80]
+tbuffer_store_format_d16_xyzw v[0:1], v2, s[4:7], format:33, 0 idxen
+// GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:44, 0 ; encoding: [0x00,0x00,0x67,0xe9,0x00,0x00,0x00,0x80]
+tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:44, 0
+// GFX10: tbuffer_store_format_xyzw v[4:7], off, s[0:3], format:61, 0 glc ; encoding: [0x00,0x40,0xef,0xe9,0x00,0x04,0x00,0x80]
+tbuffer_store_format_xyzw v[4:7], off, s[0:3], format:61, 0 glc
+// GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 slc ; encoding: [0x00,0x00,0x77,0xea,0x00,0x08,0x40,0x80]
+tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 slc
+// GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 ; encoding: [0x00,0x00,0x77,0xea,0x00,0x08,0x00,0x80]
+tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0
+// GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, 0 offset:42 ; encoding: [0x2a,0x00,0xaf,0xeb,0x00,0x00,0x00,0x80]
+tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, 0 offset:42
+// GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, s4 offset:42 ; encoding: [0x2a,0x00,0xaf,0xeb,0x00,0x00,0x00,0x04]
+tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, s4 offset:42
+// GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:47, 0 idxen ; encoding: [0x00,0x20,0x7f,0xe9,0x04,0x00,0x00,0x80]
+tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:47, 0 idxen
+// GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:115, 0 offen ; encoding: [0x00,0x10,0x9f,0xeb,0x04,0x00,0x00,0x80]
+tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:115, 0 offen
+// GFX10: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], format:70, 0 idxen offen ; encoding: [0x00,0x30,0x37,0xea,0x04,0x00,0x00,0x80]
+tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], format:70, 0 idxen offen
+// GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:63, 0 idxen ; encoding: [0x00,0x20,0xff,0xe9,0x04,0x00,0x00,0x80]
+tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:63, 0 idxen
+// GFX10: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen ; encoding: [0x00,0x20,0x77,0xe9,0x06,0x00,0x00,0x80]
+tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen
+// GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen ; encoding: [0x00,0x20,0xec,0xeb,0x01,0x00,0x00,0x80]
+tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen
+// GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen ; encoding: [0x00,0x20,0x0d,0xe9,0x02,0x00,0x00,0x80]
+tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen
Added: llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt?rev=359698&view=auto
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt (added)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt Wed May 1 09:32:58 2019
@@ -0,0 +1,311 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10 %s
+
+# GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00
+
+# GFX10: image_load v[0:3], v[1:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x00,0xf0,0x01,0x00,0x00,0x00]
+0x08,0x1f,0x00,0xf0,0x01,0x00,0x00,0x00
+
+# GFX10: image_load v[252:255], v[2:4], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x00,0xf0,0x02,0xfc,0x00,0x00]
+0x10,0x1f,0x00,0xf0,0x02,0xfc,0x00,0x00
+
+# GFX10: image_load v[252:255], v[3:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x00,0xf0,0x03,0xfc,0x00,0x00]
+0x18,0x1f,0x00,0xf0,0x03,0xfc,0x00,0x00
+
+# GFX10: image_load v[253:255], v[4:5], s[0:7] dmask:0xb dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1b,0x00,0xf0,0x04,0xfd,0x00,0x00]
+0x20,0x1b,0x00,0xf0,0x04,0xfd,0x00,0x00
+
+# GFX10: image_load v[254:255], v[5:7], s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x19,0x00,0xf0,0x05,0xfe,0x00,0x00]
+0x28,0x19,0x00,0xf0,0x05,0xfe,0x00,0x00
+
+# GFX10: image_load v255, v[6:8], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x30,0x18,0x00,0xf0,0x06,0xff,0x00,0x00]
+0x30,0x18,0x00,0xf0,0x06,0xff,0x00,0x00
+
+# GFX10: image_load v65, v[7:10], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x38,0x18,0x00,0xf0,0x07,0x41,0x00,0x00]
+0x38,0x18,0x00,0xf0,0x07,0x41,0x00,0x00
+
+# GFX10: image_load_mip v[16:19], v[8:9], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x04,0xf0,0x08,0x10,0x01,0x00]
+0x00,0x1f,0x04,0xf0,0x08,0x10,0x01,0x00
+
+# GFX10: image_load_pck v[16:19], v[8:9], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x08,0xf0,0x08,0x10,0x01,0x00]
+0x08,0x1f,0x08,0xf0,0x08,0x10,0x01,0x00
+
+# GFX10: image_load_pck_sgn v[16:19], v[8:9], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm glc ; encoding: [0x08,0x3f,0x0c,0xf0,0x08,0x10,0x01,0x00]
+0x08,0x3f,0x0c,0xf0,0x08,0x10,0x01,0x00
+
+# GFX10: image_load_mip_pck v[16:19], v[8:10], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm slc ; encoding: [0x08,0x1f,0x10,0xf2,0x08,0x10,0x01,0x00]
+0x08,0x1f,0x10,0xf2,0x08,0x10,0x01,0x00
+
+# GFX10: image_load_mip_pck_sgn v[16:19], v[8:10], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm dlc ; encoding: [0x88,0x1f,0x14,0xf0,0x08,0x10,0x01,0x00]
+0x88,0x1f,0x14,0xf0,0x08,0x10,0x01,0x00
+
+# TODO: This is incorrect: r128 should use a 128-bit register for srsrc
+# GFX10: image_load_mip_pck_sgn v[16:19], v[8:10], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D unorm r128 ; encoding: [0x08,0x9f,0x14,0xf0,0x08,0x10,0x01,0x00]
+0x08,0x9f,0x14,0xf0,0x08,0x10,0x01,0x00
+
+# GFX10: image_store v16, v[8:9], s[96:103] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x14,0x20,0xf0,0x08,0x10,0x18,0x00]
+0x08,0x14,0x20,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_store v[16:17], v[8:10], s[96:103] dmask:0x5 dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x15,0x20,0xf0,0x08,0x10,0x18,0x00]
+0x10,0x15,0x20,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_store v[16:18], v[8:10], s[96:103] dmask:0xd dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1d,0x20,0xf0,0x08,0x10,0x18,0x00]
+0x18,0x1d,0x20,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_store v[16:19], v[8:10], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x20,0xf0,0x08,0x10,0x18,0x00]
+0x28,0x1f,0x20,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_store_mip v[16:19], v[8:10], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x24,0xf0,0x08,0x10,0x18,0x00]
+0x20,0x1f,0x24,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_store_pck v[16:19], v[8:10], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x28,0xf0,0x08,0x10,0x18,0x00]
+0x10,0x1f,0x28,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_store_mip_pck v[16:19], v[8:11], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x2c,0xf0,0x08,0x10,0x18,0x00]
+0x10,0x1f,0x2c,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_get_resinfo v[16:19], v8, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x38,0xf0,0x08,0x10,0x18,0x00]
+0x28,0x1f,0x38,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_atomic_swap v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00]
+0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00
+
+# NOTE: Contents of unused NSA bytes are NOT preserved.
+
+# GFX10: image_atomic_cmpswap v[16:17], [v8, v9], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc ; encoding: [0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00]
+0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_atomic_add v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ; encoding: [0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]
+0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_atomic_sub v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc ; encoding: [0x1a,0x31,0x48,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]
+0x1a,0x31,0x48,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_atomic_smin v16, [v8, v9], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc ; encoding: [0x22,0x31,0x50,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00]
+0x22,0x31,0x50,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_atomic_umin v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc ; encoding: [0x2a,0x31,0x54,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]
+0x2a,0x31,0x54,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_atomic_smax v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc ; encoding: [0x32,0x31,0x58,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]
+0x32,0x31,0x58,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_atomic_umax v16, [v8, v9, v10, v11], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc ; encoding: [0x3a,0x31,0x5c,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x00]
+0x3a,0x31,0x5c,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_atomic_and v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x60,0xf0,0x08,0x10,0x18,0x00]
+0x00,0x31,0x60,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_atomic_or v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x64,0xf0,0x08,0x10,0x18,0x00]
+0x00,0x31,0x64,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_atomic_xor v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x68,0xf0,0x08,0x10,0x18,0x00]
+0x00,0x31,0x68,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_atomic_inc v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x6c,0xf0,0x08,0x10,0x18,0x00]
+0x00,0x31,0x6c,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_atomic_dec v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x31,0x70,0xf0,0x08,0x10,0x18,0x00]
+0x00,0x31,0x70,0xf0,0x08,0x10,0x18,0x00
+
+# TODO: image_atomic_fcmpswap
+# TODO: image_atomic_fmin
+# TODO: image_atomic_fmax
+
+# GFX10: image_sample v[16:19], v8, s[96:103], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x80,0xf0,0x08,0x10,0x18,0x00]
+0x00,0x0f,0x80,0xf0,0x08,0x10,0x18,0x00
+
+# GFX10: image_sample_cl v[16:17], [v8, v9, v10], s[96:103], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_2D d16 ; encoding: [0x0a,0x0f,0x84,0xf0,0x08,0x10,0x18,0x80,0x09,0x0a,0x00,0x00]
+0x0a,0x0f,0x84,0xf0,0x08,0x10,0x18,0x80,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_d v[16:19], [v8, v9, v10], s[96:103], s[0:3] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x88,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]
+0x02,0x0f,0x88,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_d_cl v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[4:7] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x0f,0x8c,0xf0,0x08,0x10,0x25,0x00,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00]
+0x0c,0x0f,0x8c,0xf0,0x08,0x10,0x25,0x00,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_l v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x90,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x12,0x0f,0x90,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_b v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x94,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x1a,0x0f,0x94,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_b_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x98,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x22,0x0f,0x98,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_lz v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x9c,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x2a,0x0f,0x9c,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0xa0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x0a,0x0f,0xa0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c_cl v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0xa4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x12,0x0f,0xa4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c_d v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16, v17], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x0f,0xa8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x00,0x00,0x00]
+0x16,0x0f,0xa8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14
+
+# GFX10: image_sample_c_d_cl v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0xac,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10]
+0x1c,0x0f,0xac,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_c_l v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0xb0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x1a,0x0f,0xb0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c_b v[16:19], v[8:15], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03]
+0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03
+
+# TODO: address of this instruction is v[250:255], but this register class does
+# not exist, and the next-larger size goes beyond the last register, so
+# the disassembly is not adjusted properly
+# GFX10: image_sample_c_b_cl v16, v[250:252], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03]
+0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03
+
+# GFX10: image_sample_c_lz v[16:19], v[253:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03]
+0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03
+
+# GFX10: image_sample_o v[16:19], v[252:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x0f,0xc0,0xf0,0xfc,0x10,0x25,0x03]
+0x28,0x0f,0xc0,0xf0,0xfc,0x10,0x25,0x03
+
+# GFX10: image_sample_cl_o v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0xc4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x02,0x0f,0xc4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_d_o v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x0f,0xc8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00]
+0x0c,0x0f,0xc8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_d_cl_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x0f,0xcc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x00,0x00]
+0x16,0x0f,0xcc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14
+
+# GFX10: image_sample_l_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0xd0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x1a,0x0f,0xd0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_b_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0xd4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x22,0x0f,0xd4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_b_cl_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x0f,0xd8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00]
+0x2c,0x0f,0xd8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_lz_o v[16:19], [v8, v9], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0xdc,0xf0,0x08,0x10,0x25,0x03,0x09,0x00,0x00,0x00]
+0x02,0x0f,0xdc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0xe0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x0a,0x0f,0xe0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c_cl_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x0f,0xe4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00]
+0x14,0x0f,0xe4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_c_d_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0xe8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10]
+0x1c,0x0f,0xe8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_c_d_cl_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x0f,0xec,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x00]
+0x16,0x0f,0xec,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14
+
+# GFX10: image_sample_c_l_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0xf0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x22,0x0f,0xf0,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c_b_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x0f,0xf4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00]
+0x2c,0x0f,0xf4,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_c_b_cl_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0xf8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x02,0x0f,0xf8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c_lz_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0xfc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x0a,0x0f,0xfc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4 v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_l v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x10,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x22,0x0f,0x10,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_b v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x14,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x2a,0x0f,0x14,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_b_cl v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x18,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x02,0x0f,0x18,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_lz v[16:19], [v8, v9], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0x1c,0xf1,0x08,0x10,0x25,0x03,0x09,0x00,0x00,0x00]
+0x0a,0x0f,0x1c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x20,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x12,0x0f,0x20,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_cl v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x24,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x1a,0x0f,0x24,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_l v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x30,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x22,0x0f,0x30,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_b v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x34,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x2a,0x0f,0x34,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_b_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x38,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x02,0x0f,0x38,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_lz v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0x3c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x0a,0x0f,0x3c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x40,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x12,0x0f,0x40,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_cl_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x44,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x1a,0x0f,0x44,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_l_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x50,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x22,0x0f,0x50,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_b_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x54,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x2a,0x0f,0x54,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_b_cl_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x58,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x02,0x0f,0x58,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_lz_o v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0x5c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x0a,0x0f,0x5c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x60,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x12,0x0f,0x60,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_cl_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0x64,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00]
+0x1c,0x0f,0x64,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_gather4_c_l_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x70,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x22,0x0f,0x70,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_b_o v[16:19], [v8, v9, v10, v11, v12, v13], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x0f,0x74,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x00,0x00,0x00]
+0x2c,0x0f,0x74,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_gather4_c_b_cl_o v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0x78,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x02,0x0f,0x78,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_gather4_c_lz_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0x7c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
+0x0a,0x0f,0x7c,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_get_lod v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x80,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
+0x12,0x0f,0x80,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_cd v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0xa0,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00]
+0x1c,0x0f,0xa0,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_cd_cl v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0xa4,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x22,0x0f,0xa4,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_c_cd v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2c,0x0f,0xa8,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x00]
+0x2c,0x0f,0xa8,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_c_cd_cl v[16:19], [v8, v9, v10, v11, v12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x0f,0xac,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c]
+0x02,0x0f,0xac,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
+
+# GFX10: image_sample_cd_o v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0c,0x0f,0xb0,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00]
+0x0c,0x0f,0xb0,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_cd_cl_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x16,0x0f,0xb4,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x00,0x00]
+0x16,0x0f,0xb4,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0x14
+
+# GFX10: image_sample_c_cd_o v[16:19], [v8, v9, v10, v11, v12, v13, v14, v15, v16], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1c,0x0f,0xb8,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10]
+0x1c,0x0f,0xb8,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+
+# GFX10: image_sample_c_cd_cl_o v[16:19], [v8, v9, v10, v11, v12, v13, v14], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x24,0x0f,0xbc,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x00,0x00]
+0x24,0x0f,0xbc,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
Added: llvm/trunk/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt?rev=359698&view=auto
==============================================================================
--- llvm/trunk/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt (added)
+++ llvm/trunk/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt Wed May 1 09:32:58 2019
@@ -0,0 +1,69 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX10
+
+# GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0
+0x00,0x00,0xb0,0xe8,0x00,0x00,0x20,0x80
+# GFX10: tbuffer_load_format_d16_xy v0, off, s[0:3], format:22, 0
+0x00,0x00,0xb1,0xe8,0x00,0x00,0x20,0x80
+# GFX10: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], format:22, 0
+0x00,0x00,0xb3,0xe8,0x00,0x00,0x20,0x80
+# GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0
+0x00,0x00,0x73,0xea,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:22, 0 slc
+0x00,0x00,0xb3,0xe8,0x00,0x08,0x40,0x80
+# GFX10: tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:63, 0 glc
+0x00,0x40,0xfb,0xe9,0x00,0x04,0x00,0x80
+# GFX10: tbuffer_load_format_xyzw v[12:15], off, s[0:3], format:23, 0 glc dlc
+0x00,0xc0,0xbb,0xe8,0x00,0x0c,0x00,0x80
+# GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:78, 0 offset:42
+0x2a,0x00,0x73,0xea,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_load_format_xyzw v[4:7], off, s[0:3], format:62, s4 offset:73
+0x49,0x00,0xf3,0xe9,0x00,0x04,0x00,0x04
+# GFX10: tbuffer_load_format_xyzw v[0:3], off, s[0:3], format:47, 61 offset:4095
+0xff,0x0f,0x7b,0xe9,0x00,0x00,0x00,0xbd
+# GFX10: tbuffer_load_format_xyzw v[8:11], off, s[0:3], format:77, s4 offset:1
+0x01,0x00,0x6b,0xea,0x00,0x08,0x00,0x04
+# GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 idxen
+0x00,0x20,0x73,0xea,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen
+0x00,0x10,0x73,0xea,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], format:78, 0 offen offset:52
+0x34,0x10,0x73,0xea,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], format:78, 0 idxen offen
+0x00,0x30,0x73,0xea,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_load_format_xy v[0:1], off, s[0:3], format:77, 0
+0x00,0x00,0x69,0xea,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_load_format_x v0, off, s[0:3], format:77, 0
+0x00,0x00,0x68,0xea,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_d16_x v0, v1, s[4:7], format:33, 0 idxen
+0x00,0x20,0x0c,0xe9,0x01,0x00,0x21,0x80
+# GFX10: tbuffer_store_format_d16_xy v0, v1, s[4:7], format:33, 0 idxen
+0x00,0x20,0x0d,0xe9,0x01,0x00,0x21,0x80
+# GFX10: tbuffer_store_format_d16_xyzw v[0:1], v2, s[4:7], format:33, 0 idxen
+0x00,0x20,0x0f,0xe9,0x02,0x00,0x21,0x80
+# GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:44, 0
+0x00,0x00,0x67,0xe9,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_xyzw v[4:7], off, s[0:3], format:61, 0 glc
+0x00,0x40,0xef,0xe9,0x00,0x04,0x00,0x80
+# GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0 slc
+0x00,0x00,0x77,0xea,0x00,0x08,0x40,0x80
+# GFX10: tbuffer_store_format_xyzw v[8:11], off, s[0:3], format:78, 0
+0x00,0x00,0x77,0xea,0x00,0x08,0x00,0x80
+# GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, 0 offset:42
+0x2a,0x00,0xaf,0xeb,0x00,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_xyzw v[0:3], off, s[0:3], format:117, s4 offset:42
+0x2a,0x00,0xaf,0xeb,0x00,0x00,0x00,0x04
+# GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:47, 0 idxen
+0x00,0x20,0x7f,0xe9,0x04,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:115, 0 offen
+0x00,0x10,0x9f,0xeb,0x04,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_xyzw v[0:3], v[4:5], s[0:3], format:70, 0 idxen offen
+0x00,0x30,0x37,0xea,0x04,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_xyzw v[0:3], v4, s[0:3], format:63, 0 idxen
+0x00,0x20,0xff,0xe9,0x04,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen
+0x00,0x20,0x77,0xe9,0x06,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen
+0x00,0x20,0xec,0xeb,0x01,0x00,0x00,0x80
+# GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen
+0x00,0x20,0x0d,0xe9,0x02,0x00,0x00,0x80
+
More information about the llvm-commits
mailing list