[llvm] r340831 - [AMDGPU] Add support for a16 modifiear for gfx9
Ryan Taylor via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 28 08:07:31 PDT 2018
Author: rtayl
Date: Tue Aug 28 08:07:30 2018
New Revision: 340831
URL: http://llvm.org/viewvc/llvm-project?rev=340831&view=rev
Log:
[AMDGPU] Add support for a16 modifiear for gfx9
Summary:
Adding support for a16 for gfx9. A16 bit replaces r128 bit for gfx9.
Change-Id: Ie8b881e4e6d2f023fb5e0150420893513e5f4841
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D50575
Added:
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/test/MC/AMDGPU/mimg.s
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Tue Aug 28 08:07:30 2018
@@ -242,6 +242,12 @@ def FeatureDPP : SubtargetFeature<"dpp",
"Support DPP (Data Parallel Primitives) extension"
>;
+def FeatureR128A16 : SubtargetFeature<"r128-a16",
+ "HasR128A16",
+ "true",
+ "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
+>;
+
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",
"true",
@@ -444,7 +450,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGen
FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
- FeatureAddNoCarryInsts, FeatureScalarAtomics
+ FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
]
>;
@@ -703,6 +709,9 @@ def HasSDWA9 : Predicate<"Subtarget->has
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureDPP">;
+def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
+ AssemblerPredicate<"FeatureR128A16">;
+
def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
AssemblerPredicate<"FeatureIntClamp">;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Tue Aug 28 08:07:30 2018
@@ -197,6 +197,7 @@ GCNSubtarget::GCNSubtarget(const Triple
HasSDWAMac(false),
HasSDWAOutModsVOPC(false),
HasDPP(false),
+ HasR128A16(false),
HasDLInsts(false),
D16PreservesUnusedBits(false),
FlatAddressSpace(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Tue Aug 28 08:07:30 2018
@@ -358,6 +358,7 @@ protected:
bool HasSDWAMac;
bool HasSDWAOutModsVOPC;
bool HasDPP;
+ bool HasR128A16;
bool HasDLInsts;
bool D16PreservesUnusedBits;
bool FlatAddressSpace;
@@ -791,6 +792,10 @@ public:
return HasDPP;
}
+ bool hasR128A16() const {
+ return HasR128A16;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Tue Aug 28 08:07:30 2018
@@ -156,7 +156,7 @@ public:
ImmTyDMask,
ImmTyUNorm,
ImmTyDA,
- ImmTyR128,
+ ImmTyR128A16,
ImmTyLWE,
ImmTyExpTgt,
ImmTyExpCompr,
@@ -290,7 +290,7 @@ public:
bool isDMask() const { return isImmTy(ImmTyDMask); }
bool isUNorm() const { return isImmTy(ImmTyUNorm); }
bool isDA() const { return isImmTy(ImmTyDA); }
- bool isR128() const { return isImmTy(ImmTyR128); }
+ bool isR128A16() const { return isImmTy(ImmTyR128A16); }
bool isLWE() const { return isImmTy(ImmTyLWE); }
bool isOff() const { return isImmTy(ImmTyOff); }
bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -678,7 +678,7 @@ public:
case ImmTyDMask: OS << "DMask"; break;
case ImmTyUNorm: OS << "UNorm"; break;
case ImmTyDA: OS << "DA"; break;
- case ImmTyR128: OS << "R128"; break;
+ case ImmTyR128A16: OS << "R128A16"; break;
case ImmTyLWE: OS << "LWE"; break;
case ImmTyOff: OS << "Off"; break;
case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -1090,7 +1090,6 @@ private:
bool validateMIMGAtomicDMask(const MCInst &Inst);
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
- bool validateMIMGR128(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
@@ -2445,22 +2444,6 @@ bool AMDGPUAsmParser::validateMIMGGather
return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
}
-bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
-
- const unsigned Opc = Inst.getOpcode();
- const MCInstrDesc &Desc = MII.get(Opc);
-
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
- return true;
-
- int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
- assert(Idx != -1);
-
- bool R128 = (Inst.getOperand(Idx).getImm() != 0);
-
- return !R128 || hasMIMG_R128();
-}
-
bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
@@ -2495,11 +2478,6 @@ bool AMDGPUAsmParser::validateInstructio
"integer clamping is not supported on this GPU");
return false;
}
- if (!validateMIMGR128(Inst)) {
- Error(IDLoc,
- "r128 modifier is not supported on this GPU");
- return false;
- }
// For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
if (!validateMIMGD16(Inst)) {
Error(IDLoc,
@@ -3463,6 +3441,10 @@ AMDGPUAsmParser::parseNamedBit(const cha
case AsmToken::Identifier: {
StringRef Tok = Parser.getTok().getString();
if (Tok == Name) {
+ if (Tok == "r128" && isGFX9())
+ Error(S, "r128 modifier is not supported on this GPU");
+ if (Tok == "a16" && !isGFX9())
+ Error(S, "a16 modifier is not supported on this GPU");
Bit = 1;
Parser.Lex();
} else if (Tok.startswith("no") && Tok.endswith(Name)) {
@@ -4705,7 +4687,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &In
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
@@ -4815,7 +4797,8 @@ static const OptionalOperand AMDGPUOptio
{"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
{"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
{"da", AMDGPUOperand::ImmTyDA, true, nullptr},
- {"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
+ {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
+ {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
{"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
{"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Tue Aug 28 08:07:30 2018
@@ -207,9 +207,12 @@ void AMDGPUInstPrinter::printDA(const MC
printNamedBit(MI, OpNo, O, "da");
}
-void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "r128");
+ if (STI.hasFeature(AMDGPU::FeatureR128A16))
+ printNamedBit(MI, OpNo, O, "a16");
+ else
+ printNamedBit(MI, OpNo, O, "r128");
}
void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Tue Aug 28 08:07:30 2018
@@ -80,7 +80,7 @@ private:
raw_ostream &O);
void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printLWE(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
Modified: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td Tue Aug 28 08:07:30 2018
@@ -141,7 +141,7 @@ class MIMG_NoSampler_Helper <bits<7> op,
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -199,7 +199,7 @@ class MIMG_Store_Helper <bits<7> op, str
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
@@ -252,7 +252,7 @@ class MIMG_Atomic_Helper <string asm, Re
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
}
@@ -316,7 +316,7 @@ class MIMG_Sampler_Helper <bits<7> op, s
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
- R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Aug 28 08:07:30 2018
@@ -4576,6 +4576,8 @@ SDValue SITargetLowering::lowerImage(SDV
const AMDGPU::ImageDimIntrinsicInfo *Intr,
SelectionDAG &DAG) const {
SDLoc DL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
@@ -4585,6 +4587,7 @@ SDValue SITargetLowering::lowerImage(SDV
SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
bool IsD16 = false;
+ bool IsA16 = false;
SDValue VData;
int NumVDataDwords;
unsigned AddrIdx; // Index of first address argument
@@ -4660,23 +4663,59 @@ SDValue SITargetLowering::lowerImage(SDV
}
}
- unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
- (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
- (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
- (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
+ unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
+ unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
+ unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
+ NumCoords + NumLCM;
+ unsigned NumMIVAddrs = NumVAddrs;
+
SmallVector<SDValue, 4> VAddrs;
- for (unsigned i = 0; i < NumVAddrs; ++i)
- VAddrs.push_back(Op.getOperand(AddrIdx + i));
// Optimize _L to _LZ when _L is zero
if (LZMappingInfo) {
if (auto ConstantLod =
- dyn_cast<ConstantFPSDNode>(VAddrs[NumVAddrs-1].getNode())) {
+ dyn_cast<ConstantFPSDNode>(Op.getOperand(AddrIdx+NumVAddrs-1))) {
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
- VAddrs.pop_back(); // remove 'lod'
+ NumMIVAddrs--; // remove 'lod'
+ }
+ }
+ }
+
+ // Check for 16 bit addresses and pack if true.
+ unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
+ MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
+ if (VAddrVT.getScalarType() == MVT::f16 &&
+ ST->hasFeature(AMDGPU::FeatureR128A16)) {
+ IsA16 = true;
+ for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
+ SDValue AddrLo, AddrHi;
+ // Push back extra arguments.
+ if (i < DimIdx) {
+ AddrLo = Op.getOperand(i);
+ } else {
+ AddrLo = Op.getOperand(i);
+ // Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
+ // in 1D, derivatives dx/dh and dx/dv are packed with undef.
+ if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
+ ((NumGradients / 2) % 2 == 1 &&
+ (i == DimIdx + (NumGradients / 2) - 1 ||
+ i == DimIdx + NumGradients - 1))) {
+ AddrHi = DAG.getUNDEF(MVT::f16);
+ } else {
+ AddrHi = Op.getOperand(i + 1);
+ i++;
+ }
+ AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f16,
+ {AddrLo, AddrHi});
+ AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
}
+ VAddrs.push_back(AddrLo);
}
+ } else {
+ for (unsigned i = 0; i < NumMIVAddrs; ++i)
+ VAddrs.push_back(Op.getOperand(AddrIdx + i));
}
SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
@@ -4725,7 +4764,8 @@ SDValue SITargetLowering::lowerImage(SDV
Ops.push_back(Unorm);
Ops.push_back(GLC);
Ops.push_back(SLC);
- Ops.push_back(False); // r128
+ Ops.push_back(IsA16 && // a16 or r128
+ ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
Ops.push_back(False); // tfe
Ops.push_back(False); // lwe
Ops.push_back(DimInfo->DA ? True : False);
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Tue Aug 28 08:07:30 2018
@@ -754,7 +754,7 @@ def SLC : NamedOperandBit<"SLC", NamedMa
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
-def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
+def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll?rev=340831&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll Tue Aug 28 08:07:30 2018
@@ -0,0 +1,136 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+
+; GCN-LABEL: {{^}}gather4_2d:
+; GCN: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_cube:
+; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
+define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_2darray:
+; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
+define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_2d:
+; GCN: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_cl_2d:
+; GCN: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_cl_2d:
+; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_b_2d:
+; GCN: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_b_2d:
+; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_b_cl_2d:
+; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
+; GCN: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_l_2d:
+; GCN: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_l_2d:
+; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_lz_2d:
+; GCN: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}gather4_c_lz_2d:
+; GCN: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll?rev=340831&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll Tue Aug 28 08:07:30 2018
@@ -0,0 +1,435 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_2d:
+; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_3d:
+; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cube:
+; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
+define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_1darray:
+; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da{{$}}
+define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_2darray:
+; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
+define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_1d:
+; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_2d:
+; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cl_1d:
+; GCN: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cl_2d:
+; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cl_1d:
+; GCN: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cl_2d:
+; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_1d:
+; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_2d:
+; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_1d:
+; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_2d:
+; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_cl_1d:
+; GCN: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_b_cl_2d:
+; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_cl_1d:
+; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_b_cl_2d:
+; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_1d:
+; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_2d:
+; GCN: image_sample_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABAL: {{^}}sample_d_3d:
+; GCN: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_1d:
+; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_2d:
+; GCN: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_cl_1d:
+; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_d_cl_2d:
+; GCN: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_cl_1d:
+; GCN: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_cl_2d:
+; GCN: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_1d:
+; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_2d:
+; GCN: image_sample_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_1d:
+; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_2d:
+; GCN: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_cl_1d:
+; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_cd_cl_2d:
+; GCN: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
+; GCN: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
+; GCN: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_1d:
+; GCN: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_l_2d:
+; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_1d:
+; GCN: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_l_2d:
+; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_lz_1d:
+; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_lz_2d:
+; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_lz_1d:
+; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_lz_2d:
+; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
+define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+main_body:
+ %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <4 x float> %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
+; GCN: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da{{$}}
+define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
+main_body:
+ %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret float %v
+}
+
+; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
+; GCN: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da{{$}}
+define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
+main_body:
+ %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+ ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
Modified: llvm/trunk/test/MC/AMDGPU/mimg.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/mimg.s?rev=340831&r1=340830&r2=340831&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/mimg.s (original)
+++ llvm/trunk/test/MC/AMDGPU/mimg.s Tue Aug 28 08:07:30 2018
@@ -268,6 +268,30 @@ image_sample v[193:194], v[237:240], s[
// GFX9: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80]
//===----------------------------------------------------------------------===//
+// Image Sample: a16
+//===----------------------------------------------------------------------===//
+
+image_sample v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
+// GFX9: image_sample v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0x80,0xf0,0xed,0xc1,0x27,0x00]
+// NOSICI: error: a16 modifier is not supported on this GPU
+// NOVI: error: a16 modifier is not supported on this GPU
+
+image_sample_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
+// GFX9: image_sample_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0x88,0xf0,0xed,0xc1,0x27,0x00]
+// NOSICI: error: a16 modifier is not supported on this GPU
+// NOVI: error: a16 modifier is not supported on this GPU
+
+image_sample_c_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
+// GFX9: image_sample_c_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0xa8,0xf0,0xed,0xc1,0x27,0x00]
+// NOSICI: error: a16 modifier is not supported on this GPU
+// NOVI: error: a16 modifier is not supported on this GPU
+
+image_sample_c_d_cl v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
+// GFX9: image_sample_c_d_cl v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0xac,0xf0,0xed,0xc1,0x27,0x00]
+// NOSICI: error: a16 modifier is not supported on this GPU
+// NOVI: error: a16 modifier is not supported on this GPU
+
+//===----------------------------------------------------------------------===//
// Image Atomics
//===----------------------------------------------------------------------===//
@@ -372,3 +396,13 @@ image_gather4 v[5:6], v1, s[8:15], s[12:
// NOGFX8_0: error: image data size does not match dmask and tfe
// NOGFX8_1: error: image data size does not match dmask and tfe
// NOGFX9: error: image data size does not match dmask and tfe
+
+image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 a16
+// GFX9: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 a16 ; encoding: [0x00,0x81,0x00,0xf1,0x01,0x05,0x62,0x00]
+// NOSICI: error: a16 modifier is not supported on this GPU
+// NOVI: error: a16 modifier is not supported on this GPU
+
+image_gather4_b_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 a16
+// GFX9: image_gather4_b_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 a16 ; encoding: [0x00,0x81,0x18,0xf1,0x01,0x05,0x62,0x00]
+// NOSICI: error: a16 modifier is not supported on this GPU
+// NOVI: error: a16 modifier is not supported on this GPU
More information about the llvm-commits
mailing list