[llvm] 8756869 - [AMDGPU] Add a16 feature to gfx10

Mon Feb 10 00:17:52 PST 2020

Author: Sebastian Neubauer
Date: 2020-02-10T09:04:23+01:00
New Revision: 8756869170e67019151bff0fc7657597f37fced2

URL: https://github.com/llvm/llvm-project/commit/8756869170e67019151bff0fc7657597f37fced2
DIFF: https://github.com/llvm/llvm-project/commit/8756869170e67019151bff0fc7657597f37fced2.diff

LOG: [AMDGPU] Add a16 feature to gfx10

Based on D72931

This adds a new feature called A16 which is enabled for gfx10.
gfx9 keeps the R128A16 feature so it can share all the instruction encodings
with gfx7/8.

Differential Revision: https://reviews.llvm.org/D73956

Added: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPU.td
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
    llvm/lib/Target/AMDGPU/MIMGInstructions.td
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIInstrFormats.td
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
    llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir
    llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 42b477e07b3b..106480311b5d 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -360,7 +360,13 @@ def FeatureDPP8 : SubtargetFeature<"dpp8",
 def FeatureR128A16 : SubtargetFeature<"r128-a16",
   "HasR128A16",
   "true",
-  "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
+  "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128"
+>;
+
+def FeatureGFX10A16 : SubtargetFeature<"a16",
+  "HasGFX10A16",
+  "true",
+  "Support gfx10-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands"
 >;
 
 def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
@@ -682,7 +688,8 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
    FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
    FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
    FeatureVOP3Literal, FeatureDPP8,
-   FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC
+   FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
+   FeatureGFX10A16
   ]
 >;
 
@@ -1094,6 +1101,9 @@ def HasDPP8 : Predicate<"Subtarget->hasDPP8()">,
 def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
   AssemblerPredicate<"FeatureR128A16">;
 
+def HasGFX10A16 : Predicate<"Subtarget->hasGFX10A16()">,
+  AssemblerPredicate<"FeatureGFX10A16">;
+
 def HasDPP16 : Predicate<"Subtarget->hasDPP()">,
   AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureDPP">;
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 75c4fba3e8fd..31c06ce0bfbf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -241,6 +241,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     HasDPP(false),
     HasDPP8(false),
     HasR128A16(false),
+    HasGFX10A16(false),
     HasNSAEncoding(false),
     HasDLInsts(false),
     HasDot1Insts(false),

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 6f3ca01cec65..13fa9154fb8b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -342,6 +342,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
   bool HasDPP;
   bool HasDPP8;
   bool HasR128A16;
+  bool HasGFX10A16;
   bool HasNSAEncoding;
   bool HasDLInsts;
   bool HasDot1Insts;
@@ -992,6 +993,10 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
     return HasR128A16;
   }
 
+  bool hasGFX10A16() const {
+    return HasGFX10A16;
+  }
+
   bool hasOffset3fBug() const {
     return HasOffset3fBug;
   }

diff  --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ea70307df491..8c1f63e4ca86 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -163,6 +163,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     ImmTyUNorm,
     ImmTyDA,
     ImmTyR128A16,
+    ImmTyA16,
     ImmTyLWE,
     ImmTyExpTgt,
     ImmTyExpCompr,
@@ -315,6 +316,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
   bool isDA() const { return isImmTy(ImmTyDA); }
   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
+  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
   bool isLWE() const { return isImmTy(ImmTyLWE); }
   bool isOff() const { return isImmTy(ImmTyOff); }
   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
@@ -847,6 +849,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
     case ImmTyUNorm: OS << "UNorm"; break;
     case ImmTyDA: OS << "DA"; break;
     case ImmTyR128A16: OS << "R128A16"; break;
+    case ImmTyA16: OS << "A16"; break;
     case ImmTyLWE: OS << "LWE"; break;
     case ImmTyOff: OS << "Off"; break;
     case ImmTyExpTgt: OS << "ExpTgt"; break;
@@ -1157,6 +1160,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
     return AMDGPU::hasPackedD16(getSTI());
   }
 
+  bool hasGFX10A16() const {
+    return AMDGPU::hasGFX10A16(getSTI());
+  }
+
   bool isSI() const {
     return AMDGPU::isSI(getSTI());
   }
@@ -4650,9 +4657,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
       case AsmToken::Identifier: {
         StringRef Tok = Parser.getTok().getString();
         if (Tok == Name) {
-          if (Tok == "r128" && isGFX9())
+          if (Tok == "r128" && !hasMIMG_R128())
             Error(S, "r128 modifier is not supported on this GPU");
-          if (Tok == "a16" && !isGFX9() && !isGFX10())
+          if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
             Error(S, "a16 modifier is not supported on this GPU");
           Bit = 1;
           Parser.Lex();
@@ -4672,6 +4679,9 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
     return MatchOperand_ParseFail;
 
+  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
+    ImmTy = AMDGPUOperand::ImmTyR128A16;
+
   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
   return MatchOperand_Success;
 }
@@ -5987,6 +5997,8 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
+  if (IsGFX10)
+    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
   if (!IsGFX10)
@@ -6096,7 +6108,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
-  {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
+  {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index f65dc25d7eec..55146b512aa5 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -244,6 +244,11 @@ void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
     printNamedBit(MI, OpNo, O, "r128");
 }
 
+void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "a16");
+}
+
 void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "lwe");

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index ba53003e9041..83f99f1f4a35 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -86,6 +86,8 @@ class AMDGPUInstPrinter : public MCInstPrinter {
                raw_ostream &O);
   void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                  raw_ostream &O);
+  void printGFX10A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
   void printLWE(const MCInst *MI, unsigned OpNo,
                 const MCSubtargetInfo &STI, raw_ostream &O);
   void printD16(const MCInst *MI, unsigned OpNo,

diff  --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 4006a6205fb8..8a439425062e 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -238,9 +238,9 @@ class MIMG_NoSampler_gfx10<int op, string opcode,
   : MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
   let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
                                 Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
-                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                                SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
-  let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+  let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
                     #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
@@ -251,9 +251,9 @@ class MIMG_NoSampler_nsa_gfx10<int op, string opcode,
   let InOperandList = !con(AddrIns,
                            (ins SReg_256:$srsrc, DMask:$dmask,
                                 Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
-                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                                SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
-  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
                     #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
@@ -331,9 +331,9 @@ class MIMG_Store_gfx10<int op, string opcode,
   : MIMG_gfx10<op, (outs), dns> {
   let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
                                 DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
-                                GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                                GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
-  let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+  let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
                     #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
@@ -345,9 +345,9 @@ class MIMG_Store_nsa_gfx10<int op, string opcode,
                            AddrIns,
                            (ins SReg_256:$srsrc, DMask:$dmask,
                                 Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
-                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                                SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
-  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe"
+  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
                     #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
@@ -436,8 +436,8 @@ class MIMG_Atomic_gfx10<mimg op, string opcode,
 
   let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
                            DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
-                           GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe);
-  let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+                           GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
+  let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
 }
 
 class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
@@ -452,8 +452,8 @@ class MIMG_Atomic_nsa_gfx10<mimg op, string opcode,
                            AddrIns,
                            (ins SReg_256:$srsrc, DMask:$dmask,
                                 Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
-                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe));
-  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$tfe$lwe";
+                                SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
+  let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
 }
 
 multiclass MIMG_Atomic_Addr_Helper_m <mimg op, string asm,
@@ -522,10 +522,10 @@ class MIMG_Sampler_gfx10<int op, string opcode,
   : MIMG_gfx10<op, (outs DataRC:$vdata), dns> {
   let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
                                 DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
-                                GLC:$glc, SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                                GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
   let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
-                    #"$dlc$glc$slc$r128$tfe$lwe"
+                    #"$dlc$glc$slc$r128$a16$tfe$lwe"
                     #!if(BaseOpcode.HasD16, "$d16", "");
 }
 
@@ -536,10 +536,10 @@ class MIMG_Sampler_nsa_gfx10<int op, string opcode,
   let InOperandList = !con(AddrIns,
                            (ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
                                 Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
-                                SLC:$slc, R128A16:$r128, TFE:$tfe, LWE:$lwe),
+                                SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
                            !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
   let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
-                    #"$dlc$glc$slc$r128$tfe$lwe"
+                    #"$dlc$glc$slc$r128$a16$tfe$lwe"
                     #!if(BaseOpcode.HasD16, "$d16", "");
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 475a2b8f30da..e60ad7545425 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5401,7 +5401,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
   const MVT VAddrScalarVT = VAddrVT.getScalarType();
   if (((VAddrScalarVT == MVT::f16) || (VAddrScalarVT == MVT::i16))) {
     // Illegal to use a16 images
-    if (!ST->hasFeature(AMDGPU::FeatureR128A16))
+    if (!ST->hasFeature(AMDGPU::FeatureR128A16) && !ST->hasFeature(AMDGPU::FeatureGFX10A16))
       return Op;
 
     IsA16 = true;
@@ -5546,10 +5546,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
     Ops.push_back(DLC);
   Ops.push_back(GLC);
   Ops.push_back(SLC);
-  Ops.push_back(IsA16 &&  // a16 or r128
+  Ops.push_back(IsA16 &&  // r128, a16 for gfx9
                 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
-  Ops.push_back(TFE); // tfe
-  Ops.push_back(LWE); // lwe
+  if (IsGFX10)
+    Ops.push_back(IsA16 ? True : False);
+  Ops.push_back(TFE);
+  Ops.push_back(LWE);
   if (!IsGFX10)
     Ops.push_back(DimInfo->DA ? True : False);
   if (BaseOpcode->HasD16)

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 4dcbe92861f2..a4d11780118f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -303,7 +303,7 @@ class MIMGe_gfx10 <bits<8> op> : MIMGe {
   bits<3> dim;
   bits<2> nsa;
   bits<1> dlc;
-  bits<1> a16 = 0; // TODO: this should be an operand
+  bits<1> a16;
 
   let Inst{0} = op{7};
   let Inst{2-1} = nsa;

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a9800bba8d86..73690aec16b2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3742,11 +3742,34 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
         return false;
       }
 
+      bool IsA16 = false;
+      if (ST.hasR128A16()) {
+        const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128);
+        IsA16 = R128A16->getImm() != 0;
+      } else if (ST.hasGFX10A16()) {
+        const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16);
+        IsA16 = A16->getImm() != 0;
+      }
+
+      bool PackDerivatives = IsA16; // Either A16 or G16
       bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
-      unsigned AddrWords = BaseOpcode->NumExtraArgs +
-                           (BaseOpcode->Gradients ? Dim->NumGradients : 0) +
-                           (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
-                           (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+
+      unsigned AddrWords = BaseOpcode->NumExtraArgs;
+      unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
+                                (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+      if (IsA16)
+        AddrWords += (AddrComponents + 1) / 2;
+      else
+        AddrWords += AddrComponents;
+
+      if (BaseOpcode->Gradients) {
+        if (PackDerivatives)
+          // There are two gradients per coordinate, we pack them separately.
+          // For the 3d case, we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
+          AddrWords += (Dim->NumGradients / 2 + 1) / 2 * 2;
+        else
+          AddrWords += Dim->NumGradients;
+      }
 
       unsigned VAddrWords;
       if (IsNSA) {
@@ -3758,11 +3781,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
           AddrWords = 16;
         else if (AddrWords > 4)
           AddrWords = 8;
-        else if (AddrWords == 3 && VAddrWords == 4) {
-          // CodeGen uses the V4 variant of instructions for three addresses,
-          // because the selection DAG does not support non-power-of-two types.
+        else if (AddrWords == 4)
           AddrWords = 4;
-        }
+        else if (AddrWords == 3)
+          AddrWords = 3;
       }
 
       if (VAddrWords != AddrWords) {

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index bf6f02965475..559ede882fc5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1090,6 +1090,7 @@ def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
 def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
 def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
 def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
+def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>;
 def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
 def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
 def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;

diff  --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 806f8af9d511..dd94a29a9e50 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -686,7 +686,8 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
   // Check other optional immediate operands for equality.
   unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc,
                                 AMDGPU::OpName::d16, AMDGPU::OpName::unorm,
-                                AMDGPU::OpName::da,  AMDGPU::OpName::r128};
+                                AMDGPU::OpName::da,  AMDGPU::OpName::r128,
+                                AMDGPU::OpName::a16};
 
   for (auto op : OperandsToMatch) {
     int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 11886715dac1..a5ed44198992 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -927,7 +927,11 @@ bool hasSRAMECC(const MCSubtargetInfo &STI) {
 }
 
 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
-  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
+  return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
+}
+
+bool hasGFX10A16(const MCSubtargetInfo &STI) {
+  return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
 }
 
 bool hasPackedD16(const MCSubtargetInfo &STI) {

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index da2f4dc4877d..4cfb2adee96e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -551,6 +551,7 @@ inline bool isKernel(CallingConv::ID CC) {
 bool hasXNACK(const MCSubtargetInfo &STI);
 bool hasSRAMECC(const MCSubtargetInfo &STI);
 bool hasMIMG_R128(const MCSubtargetInfo &STI);
+bool hasGFX10A16(const MCSubtargetInfo &STI);
 bool hasPackedD16(const MCSubtargetInfo &STI);
 
 bool isSI(const MCSubtargetInfo &STI);

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
index 96f0210825c6..c92f5ff89e3e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
@@ -1,17 +1,39 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 
-; GCN-LABEL: {{^}}load_1d:
-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_2d:
-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %t = extractelement <2 x i16> %coords, i32 1
@@ -19,9 +41,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_3d:
-; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -30,9 +62,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_cube:
-; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -41,9 +83,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1darray:
-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %slice = extractelement <2 x i16> %coords, i32 1
@@ -51,9 +103,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_2darray:
-; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -62,9 +124,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_2dmsaa:
-; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_2dmsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -73,9 +145,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_2darraymsaa:
-; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_2darraymsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -85,9 +167,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_1d:
-; GCN: image_load_mip v[0:3], v0, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_mip_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %mip = extractelement <2 x i16> %coords, i32 1
@@ -95,9 +187,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_2d:
-; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -106,9 +208,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_3d:
-; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -118,9 +230,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_cube:
-; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -130,9 +252,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_1darray:
-; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %slice = extractelement <2 x i16> %coords_lo, i32 1
@@ -141,9 +273,19 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_2darray:
-; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -153,18 +295,34 @@ main_body:
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}store_1d:
-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_2d:
-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %t = extractelement <2 x i16> %coords, i32 1
@@ -172,9 +330,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_3d:
-; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -183,9 +349,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_cube:
-; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -194,9 +368,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_1darray:
-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %slice = extractelement <2 x i16> %coords, i32 1
@@ -204,9 +386,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_2darray:
-; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -215,9 +405,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_2dmsaa:
-; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_2dmsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -226,9 +424,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_2darraymsaa:
-; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_2darraymsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -238,9 +444,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_1d:
-; GCN: image_store_mip v[0:3], v4, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_mip_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v4, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %mip = extractelement <2 x i16> %coords, i32 1
@@ -248,9 +462,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_2d:
-; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -259,9 +481,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_3d:
-; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -271,9 +501,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_cube:
-; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -283,9 +521,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_1darray:
-; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %slice = extractelement <2 x i16> %coords_lo, i32 1
@@ -294,9 +540,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_2darray:
-; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords_lo, i32 0
   %t = extractelement <2 x i16> %coords_lo, i32 1
@@ -306,172 +560,347 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}getresinfo_1d:
-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_2d:
-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_3d:
-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_cube:
-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_1darray:
-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_2darray:
-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_2dmsaa:
-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_2dmsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_2darraymsaa:
-; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da{{$}}
 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_2darraymsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1d_V1:
-; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm a16
 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_V1:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret float %v
 }
 
-; GCN-LABEL: {{^}}load_1d_V2:
-; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm a16
 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_V2:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret <2 x float> %v
 }
 
-; GCN-LABEL: {{^}}store_1d_V1:
-; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm a16
 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_V1:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_1d_V2:
-; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm a16
 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_V2:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}load_1d_glc:
-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc a16{{$}}
 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_glc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_glc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1d_slc:
-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc a16{{$}}
 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_slc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1d_glc_slc:
-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc a16{{$}}
 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_glc_slc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_glc_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}store_1d_glc:
-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc a16{{$}}
 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_glc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_glc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_1d_slc:
-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc a16{{$}}
 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_slc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_1d_glc_slc:
-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc a16{{$}}
 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_glc_slc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_glc_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %s = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
   ret void
 }
 
-; GCN-LABEL: {{^}}getresinfo_dmask0:
-; GCN-NOT: image
-; GCN: ; return to shader part epilog
 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 {
+; GFX9-LABEL: getresinfo_dmask0:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_dmask0:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %mip = extractelement <2 x i16> %coords, i32 0
   %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
new file mode 100644
index 000000000000..af76eb3433bc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
@@ -0,0 +1,959 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s
+
+define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %t = extractelement <2 x i16> %coords, i32 1
+  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %r = extractelement <2 x i16> %coords_hi, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %slice = extractelement <2 x i16> %coords, i32 1
+  %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_2dmsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x30,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %fragid = extractelement <2 x i16> %coords_hi, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_2darraymsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x38,0x1f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  %fragid = extractelement <2 x i16> %coords_hi, i32 1
+  %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_mip_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %mip = extractelement <2 x i16> %coords, i32 1
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %mip = extractelement <2 x i16> %coords_hi, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %r = extractelement <2 x i16> %coords_hi, i32 0
+  %mip = extractelement <2 x i16> %coords_hi, i32 1
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  %mip = extractelement <2 x i16> %coords_hi, i32 1
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %slice = extractelement <2 x i16> %coords_lo, i32 1
+  %mip = extractelement <2 x i16> %coords_hi, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: load_mip_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x04,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  %mip = extractelement <2 x i16> %coords_hi, i32 1
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %t = extractelement <2 x i16> %coords, i32 1
+  call void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %r = extractelement <2 x i16> %coords_hi, i32 0
+  call void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  call void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %slice = extractelement <2 x i16> %coords, i32 1
+  call void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  call void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_2dmsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x30,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %fragid = extractelement <2 x i16> %coords_hi, i32 0
+  call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_2darraymsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x38,0x1f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  %fragid = extractelement <2 x i16> %coords_hi, i32 1
+  call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_mip_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v4, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_mip_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %mip = extractelement <2 x i16> %coords, i32 1
+  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_mip_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %mip = extractelement <2 x i16> %coords_hi, i32 0
+  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_mip_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %r = extractelement <2 x i16> %coords_hi, i32 0
+  %mip = extractelement <2 x i16> %coords_hi, i32 1
+  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %r, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_mip_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  %mip = extractelement <2 x i16> %coords_hi, i32 1
+  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_mip_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %slice = extractelement <2 x i16> %coords_lo, i32 1
+  %mip = extractelement <2 x i16> %coords_hi, i32 0
+  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
+; GFX9-LABEL: store_mip_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_mip_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x24,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords_lo, i32 0
+  %t = extractelement <2 x i16> %coords_lo, i32 1
+  %slice = extractelement <2 x i16> %coords_hi, i32 0
+  %mip = extractelement <2 x i16> %coords_hi, i32 1
+  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %slice, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 ; encoding: [0x08,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 ; encoding: [0x10,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm a16 ; encoding: [0x18,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm a16 ; encoding: [0x20,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm a16 ; encoding: [0x28,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_2dmsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 ; encoding: [0x00,0x9f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm a16 ; encoding: [0x30,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: getresinfo_2darraymsaa:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm a16 da ; encoding: [0x00,0xdf,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 ; encoding: [0x38,0x1f,0x38,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 15, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_V1:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm a16 ; encoding: [0x00,0x98,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x18,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %v = call float @llvm.amdgcn.image.load.1d.f32.i16(i32 8, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret float %v
+}
+
+define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_V2:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm a16 ; encoding: [0x00,0x99,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x19,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 9, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <2 x float> %v
+}
+
+define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_V1:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 unorm a16 ; encoding: [0x00,0x92,0x20,0xf0,0x01,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_1d_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x12,0x20,0xf0,0x01,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  call void @llvm.amdgcn.image.store.1d.f32.i16(float %vdata, i32 2, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_V2:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc unorm a16 ; encoding: [0x00,0x9c,0x20,0xf0,0x02,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_1d_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm a16 ; encoding: [0x00,0x1c,0x20,0xf0,0x02,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  call void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float> %vdata, i32 12, i16 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_glc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc a16 ; encoding: [0x00,0xbf,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_glc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 ; encoding: [0x00,0x3f,0x00,0xf0,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_slc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc a16 ; encoding: [0x00,0x9f,0x00,0xf2,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16 ; encoding: [0x00,0x1f,0x00,0xf2,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
+; GFX9-LABEL: load_1d_glc_slc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc a16 ; encoding: [0x00,0xbf,0x00,0xf2,0x00,0x00,0x00,0x00]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_glc_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16 ; encoding: [0x00,0x3f,0x00,0xf2,0x00,0x00,0x00,0x40]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
+  ret <4 x float> %v
+}
+
+define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_glc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc a16 ; encoding: [0x00,0xbf,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_1d_glc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 ; encoding: [0x00,0x3f,0x20,0xf0,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 1)
+  ret void
+}
+
+define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_slc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc a16 ; encoding: [0x00,0x9f,0x20,0xf2,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_1d_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc a16 ; encoding: [0x00,0x1f,0x20,0xf2,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 2)
+  ret void
+}
+
+define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) {
+; GFX9-LABEL: store_1d_glc_slc:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc a16 ; encoding: [0x00,0xbf,0x20,0xf2,0x04,0x00,0x00,0x00]
+; GFX9-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
+; GFX10-LABEL: store_1d_glc_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc a16 ; encoding: [0x00,0x3f,0x20,0xf2,0x04,0x00,0x00,0x40]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+main_body:
+  %s = extractelement <2 x i16> %coords, i32 0
+  call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, <8 x i32> %rsrc, i32 0, i32 3)
+  ret void
+}
+
+define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, <2 x i16> %coords) #0 {
+; GFX9-LABEL: getresinfo_dmask0:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_dmask0:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
+main_body:
+  %mip = extractelement <2 x i16> %coords, i32 0
+  %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 0, i16 %mip, <8 x i32> %rsrc, i32 0, i32 0)
+  ret <4 x float> %r
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
+
+declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
+
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
+
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
+
+declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) #1
+declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
+declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) #1
+declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) #0
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
index bc3b01a0f062..8fb62c26e3f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -1,222 +1,379 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 
 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
-; GCN-LABEL: gather4_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
-; GCN-LABEL: gather4_cube:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
-; GCN-LABEL: gather4_2darray:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
-; GCN-LABEL: gather4_c_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_c_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_c_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
-; GCN-LABEL: gather4_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
-; GCN-LABEL: gather4_c_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_mov_b32_e32 v5, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_c_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_c_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
-; GCN-LABEL: gather4_b_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_b_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_b_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
-; GCN-LABEL: gather4_c_b_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GCN-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_c_b_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_c_b_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
-; GCN-LABEL: gather4_b_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_mov_b32_e32 v5, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_b_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_b_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
-; GCN-LABEL: gather4_c_b_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_mov_b32_e32 v7, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, v0
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GCN-NEXT:    v_mov_b32_e32 v5, v1
-; GCN-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_c_b_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_mov_b32_e32 v7, v4
+; GFX9-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
+; GFX9-NEXT:    v_mov_b32_e32 v5, v1
+; GFX9-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_gather4_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_c_b_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
-; GCN-LABEL: gather4_l_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_l_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_l_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
-; GCN-LABEL: gather4_c_l_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v5, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
-; GCN-NEXT:    image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_c_l_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
+; GFX9-NEXT:    image_gather4_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_c_l_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_gather4_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
-; GCN-LABEL: gather4_lz_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GCN-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_lz_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_lz_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
-; GCN-LABEL: gather4_c_lz_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: gather4_c_lz_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: gather4_c_lz_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
index fd2c6e796c81..24e6e46ccc39 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
@@ -1,7 +1,9 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
 ; GCN-LABEL: {{^}}load.f16.1d:
-; GCN: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
+; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
+; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 d16
 define amdgpu_ps <4 x half> @load.f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -10,7 +12,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v2f16.1d:
-; GCN: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
+; GFX9: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
+; GFX10: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v2f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -19,7 +22,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v3f16.1d:
-; GCN: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
+; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
+; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v3f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -28,7 +32,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v4f16.1d:
-; GCN: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
+; GFX9: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
+; GFX10: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v4f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -37,7 +42,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.f16.2d:
-; GCN: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
+; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16 d16
+; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16 d16
 define amdgpu_ps <4 x half> @load.f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -47,7 +53,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v2f16.2d:
-; GCN: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
+; GFX9: image_load v0, v0, s[0:7] dmask:0x3 unorm a16 d16
+; GFX10: image_load v0, v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v2f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -57,7 +64,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v3f16.2d:
-; GCN: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
+; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm a16 d16
+; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v3f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -67,7 +75,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v4f16.2d:
-; GCN: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
+; GFX9: image_load v[0:1], v0, s[0:7] dmask:0xf unorm a16 d16
+; GFX10: image_load v[0:1], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v4f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -77,7 +86,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.f16.3d:
-; GCN: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16 d16
+; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16 d16
+; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16 d16
 define amdgpu_ps <4 x half> @load.f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
@@ -88,7 +98,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v2f16.3d:
-; GCN: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm a16 d16
+; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm a16 d16
+; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v2f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
@@ -99,7 +110,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v3f16.3d:
-; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
+; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
+; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v3f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
@@ -110,7 +122,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v4f16.3d:
-; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm a16 d16
+; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm a16 d16
+; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 d16
 define amdgpu_ps <4 x half> @load.v4f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
index b05b85eaa4fd..42f26ef57d5f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
@@ -1,7 +1,9 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
 ; GCN-LABEL: {{^}}load.f32.1d:
-; GCN: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
+; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
+; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16
 define amdgpu_ps <4 x float> @load.f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -10,7 +12,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v2f32.1d:
-; GCN: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
+; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
+; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16
 define amdgpu_ps <4 x float> @load.v2f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -19,7 +22,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v3f32.1d:
-; GCN: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
+; GFX9: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
+; GFX10: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16
 define amdgpu_ps <4 x float> @load.v3f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -28,7 +32,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v4f32.1d:
-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
 define amdgpu_ps <4 x float> @load.v4f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -37,7 +42,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.f32.2d:
-; GCN: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
+; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
+; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16
 define amdgpu_ps <4 x float> @load.f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -47,7 +53,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v2f32.2d:
-; GCN: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
+; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
+; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16
 define amdgpu_ps <4 x float> @load.v2f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -57,7 +64,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v3f32.2d:
-; GCN: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
+; GFX9: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
+; GFX10: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16
 define amdgpu_ps <4 x float> @load.v3f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -67,7 +75,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v4f32.2d:
-; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX9: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
+; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
 define amdgpu_ps <4 x float> @load.v4f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
@@ -77,7 +86,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.f32.3d:
-; GCN: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16
+; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16
+; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16
 define amdgpu_ps <4 x float> @load.f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
@@ -88,7 +98,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v2f32.3d:
-; GCN: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm a16
+; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm a16
+; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16
 define amdgpu_ps <4 x float> @load.v2f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
@@ -99,7 +110,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v3f32.3d:
-; GCN: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm a16
+; GFX9: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm a16
+; GFX10: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16
 define amdgpu_ps <4 x float> @load.v3f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
@@ -110,7 +122,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}load.v4f32.3d:
-; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
+; GFX9: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
+; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
 define amdgpu_ps <4 x float> @load.v4f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
index 2b6435647554..2a132ce2ccd9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
@@ -1,732 +1,1252 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
+
 define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
-; GCN-LABEL: sample_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
-; GCN-LABEL: sample_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
-; GCN-LABEL: sample_3d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
-; GCN-LABEL: sample_cube:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_cube:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
-; GCN-LABEL: sample_1darray:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_1darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
-; GCN-LABEL: sample_2darray:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_2darray:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
-; GCN-LABEL: sample_c_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
-; GCN-LABEL: sample_c_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
-; GCN-LABEL: sample_cl_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_cl_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
-; GCN-LABEL: sample_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
-; GCN-LABEL: sample_c_cl_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_cl_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
-; GCN-LABEL: sample_c_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_mov_b32_e32 v5, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_c_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
-; GCN-LABEL: sample_b_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_b_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_b_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
-; GCN-LABEL: sample_b_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_b_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_b_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
-; GCN-LABEL: sample_c_b_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_b_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_b_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
-; GCN-LABEL: sample_c_b_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GCN-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_b_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_b_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
-; GCN-LABEL: sample_b_cl_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_b_cl_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_b_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
-; GCN-LABEL: sample_b_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_mov_b32_e32 v5, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_b_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_b_cl v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_b_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
-; GCN-LABEL: sample_c_b_cl_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GCN-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_b_cl_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_b_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
-; GCN-LABEL: sample_c_b_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b64 s[12:13], exec
-; GCN-NEXT:    s_wqm_b64 exec, exec
-; GCN-NEXT:    v_mov_b32_e32 v7, v4
-; GCN-NEXT:    v_mov_b32_e32 v4, v0
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GCN-NEXT:    v_mov_b32_e32 v5, v1
-; GCN-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
-; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
-; GCN-NEXT:    image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_b_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_mov_b32_e32 v7, v4
+; GFX9-NEXT:    v_mov_b32_e32 v4, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
+; GFX9-NEXT:    v_mov_b32_e32 v5, v1
+; GFX9-NEXT:    v_lshl_or_b32 v6, v3, 16, v0
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_c_b_cl v[0:3], v[4:7], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_b_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
-; GCN-LABEL: sample_d_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_d_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
-; GCN-LABEL: sample_d_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v6, 0xffff
-; GCN-NEXT:    v_and_b32_e32 v4, v6, v4
-; GCN-NEXT:    v_and_b32_e32 v2, v6, v2
-; GCN-NEXT:    v_and_b32_e32 v0, v6, v0
-; GCN-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
-; GCN-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
-; GCN-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
-; GCN-NEXT:    image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_d_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v4, v6, v4
+; GFX9-NEXT:    v_and_b32_e32 v2, v6, v2
+; GFX9-NEXT:    v_and_b32_e32 v0, v6, v0
+; GFX9-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
+; GFX9-NEXT:    image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
+; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
+; GFX10-NEXT:    image_sample_d v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
-; GCN-LABEL: sample_d_3d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v12, v8
-; GCN-NEXT:    v_mov_b32_e32 v8, v2
-; GCN-NEXT:    v_mov_b32_e32 v2, 0xffff
-; GCN-NEXT:    v_mov_b32_e32 v10, v5
-; GCN-NEXT:    v_and_b32_e32 v5, v2, v6
-; GCN-NEXT:    v_and_b32_e32 v3, v2, v3
-; GCN-NEXT:    v_and_b32_e32 v0, v2, v0
-; GCN-NEXT:    v_lshl_or_b32 v11, v7, 16, v5
-; GCN-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
-; GCN-NEXT:    v_lshl_or_b32 v7, v1, 16, v0
-; GCN-NEXT:    image_sample_d v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_d_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v12, v8
+; GFX9-NEXT:    v_mov_b32_e32 v8, v2
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v10, v5
+; GFX9-NEXT:    v_and_b32_e32 v5, v2, v6
+; GFX9-NEXT:    v_and_b32_e32 v3, v2, v3
+; GFX9-NEXT:    v_and_b32_e32 v0, v2, v0
+; GFX9-NEXT:    v_lshl_or_b32 v11, v7, 16, v5
+; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
+; GFX9-NEXT:    v_lshl_or_b32 v7, v1, 16, v0
+; GFX9-NEXT:    image_sample_d v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v6, v9, v6
+; GFX10-NEXT:    v_and_b32_e32 v3, v9, v3
+; GFX10-NEXT:    v_and_b32_e32 v0, v9, v0
+; GFX10-NEXT:    v_lshl_or_b32 v6, v7, 16, v6
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_sample_d v[0:3], [v0, v2, v3, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
-; GCN-LABEL: sample_c_d_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_d_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
-; GCN-LABEL: sample_c_d_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v9, 0xffff
-; GCN-NEXT:    v_mov_b32_e32 v8, v2
-; GCN-NEXT:    v_mov_b32_e32 v7, v3
-; GCN-NEXT:    v_and_b32_e32 v2, v9, v5
-; GCN-NEXT:    v_and_b32_e32 v1, v9, v1
-; GCN-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
-; GCN-NEXT:    v_and_b32_e32 v2, v9, v7
-; GCN-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
-; GCN-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
-; GCN-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_d_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v9, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v8, v2
+; GFX9-NEXT:    v_mov_b32_e32 v7, v3
+; GFX9-NEXT:    v_and_b32_e32 v2, v9, v5
+; GFX9-NEXT:    v_and_b32_e32 v1, v9, v1
+; GFX9-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
+; GFX9-NEXT:    v_and_b32_e32 v2, v9, v7
+; GFX9-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
+; GFX9-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v10, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v3, v10, v3
+; GFX10-NEXT:    v_and_b32_e32 v1, v10, v1
+; GFX10-NEXT:    v_and_b32_e32 v5, v10, v5
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    v_lshl_or_b32 v6, v6, 16, v5
+; GFX10-NEXT:    image_sample_c_d v[0:3], [v0, v1, v3, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
-; GCN-LABEL: sample_d_cl_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GCN-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GCN-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_d_cl_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX9-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
-; GCN-LABEL: sample_d_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v7, 0xffff
-; GCN-NEXT:    v_and_b32_e32 v4, v7, v4
-; GCN-NEXT:    v_and_b32_e32 v2, v7, v2
-; GCN-NEXT:    v_and_b32_e32 v0, v7, v0
-; GCN-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
-; GCN-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
-; GCN-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
-; GCN-NEXT:    image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_d_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v4, v7, v4
+; GFX9-NEXT:    v_and_b32_e32 v2, v7, v2
+; GFX9-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
+; GFX9-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
+; GFX9-NEXT:    image_sample_d_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
+; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
+; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
+; GFX10-NEXT:    image_sample_d_cl v[0:3], [v0, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
-; GCN-LABEL: sample_c_d_cl_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GCN-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
-; GCN-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_d_cl_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX9-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
-; GCN-LABEL: sample_c_d_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v11, v7
-; GCN-NEXT:    v_mov_b32_e32 v7, v0
-; GCN-NEXT:    v_mov_b32_e32 v0, 0xffff
-; GCN-NEXT:    v_and_b32_e32 v5, v0, v5
-; GCN-NEXT:    v_and_b32_e32 v3, v0, v3
-; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_lshl_or_b32 v10, v6, 16, v5
-; GCN-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
-; GCN-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
-; GCN-NEXT:    image_sample_c_d_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_d_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v11, v7
+; GFX9-NEXT:    v_mov_b32_e32 v7, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v5, v0, v5
+; GFX9-NEXT:    v_and_b32_e32 v3, v0, v3
+; GFX9-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX9-NEXT:    v_lshl_or_b32 v10, v6, 16, v5
+; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
+; GFX9-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
+; GFX9-NEXT:    image_sample_c_d_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v5, v8, v5
+; GFX10-NEXT:    v_and_b32_e32 v1, v8, v1
+; GFX10-NEXT:    v_and_b32_e32 v3, v8, v3
+; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    v_lshl_or_b32 v6, v4, 16, v3
+; GFX10-NEXT:    image_sample_c_d_cl v[0:3], [v0, v1, v6, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
-; GCN-LABEL: sample_cd_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_cd_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cd_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
-; GCN-LABEL: sample_cd_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v6, 0xffff
-; GCN-NEXT:    v_and_b32_e32 v4, v6, v4
-; GCN-NEXT:    v_and_b32_e32 v2, v6, v2
-; GCN-NEXT:    v_and_b32_e32 v0, v6, v0
-; GCN-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
-; GCN-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
-; GCN-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
-; GCN-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_cd_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v4, v6, v4
+; GFX9-NEXT:    v_and_b32_e32 v2, v6, v2
+; GFX9-NEXT:    v_and_b32_e32 v0, v6, v0
+; GFX9-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
+; GFX9-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cd_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
+; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
+; GFX10-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
+; GFX10-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
-; GCN-LABEL: sample_c_cd_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_cd_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cd_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
-; GCN-LABEL: sample_c_cd_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v9, 0xffff
-; GCN-NEXT:    v_mov_b32_e32 v8, v2
-; GCN-NEXT:    v_mov_b32_e32 v7, v3
-; GCN-NEXT:    v_and_b32_e32 v2, v9, v5
-; GCN-NEXT:    v_and_b32_e32 v1, v9, v1
-; GCN-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
-; GCN-NEXT:    v_and_b32_e32 v2, v9, v7
-; GCN-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
-; GCN-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
-; GCN-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_cd_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v9, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v8, v2
+; GFX9-NEXT:    v_mov_b32_e32 v7, v3
+; GFX9-NEXT:    v_and_b32_e32 v2, v9, v5
+; GFX9-NEXT:    v_and_b32_e32 v1, v9, v1
+; GFX9-NEXT:    v_lshl_or_b32 v3, v6, 16, v2
+; GFX9-NEXT:    v_and_b32_e32 v2, v9, v7
+; GFX9-NEXT:    v_lshl_or_b32 v2, v4, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v1, v8, 16, v1
+; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cd_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v10, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v3, v10, v3
+; GFX10-NEXT:    v_and_b32_e32 v1, v10, v1
+; GFX10-NEXT:    v_and_b32_e32 v5, v10, v5
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    v_lshl_or_b32 v6, v6, 16, v5
+; GFX10-NEXT:    image_sample_c_cd v[0:3], [v0, v1, v3, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
-; GCN-LABEL: sample_cd_cl_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; GCN-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GCN-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_cd_cl_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cd_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
-; GCN-LABEL: sample_cd_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v7, 0xffff
-; GCN-NEXT:    v_and_b32_e32 v4, v7, v4
-; GCN-NEXT:    v_and_b32_e32 v2, v7, v2
-; GCN-NEXT:    v_and_b32_e32 v0, v7, v0
-; GCN-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
-; GCN-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
-; GCN-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
-; GCN-NEXT:    image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_cd_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v4, v7, v4
+; GFX9-NEXT:    v_and_b32_e32 v2, v7, v2
+; GFX9-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
+; GFX9-NEXT:    v_lshl_or_b32 v4, v3, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v3, v1, 16, v0
+; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cd_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX10-NEXT:    v_and_b32_e32 v4, v7, v4
+; GFX10-NEXT:    v_and_b32_e32 v2, v7, v2
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    v_lshl_or_b32 v5, v5, 16, v4
+; GFX10-NEXT:    v_lshl_or_b32 v3, v3, 16, v2
+; GFX10-NEXT:    image_sample_cd_cl v[0:3], [v0, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
-; GCN-LABEL: sample_c_cd_cl_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v3, 0xffff, v3
-; GCN-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
-; GCN-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_cd_cl_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cd_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
+; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
-; GCN-LABEL: sample_c_cd_cl_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v11, v7
-; GCN-NEXT:    v_mov_b32_e32 v7, v0
-; GCN-NEXT:    v_mov_b32_e32 v0, 0xffff
-; GCN-NEXT:    v_and_b32_e32 v5, v0, v5
-; GCN-NEXT:    v_and_b32_e32 v3, v0, v3
-; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_lshl_or_b32 v10, v6, 16, v5
-; GCN-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
-; GCN-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
-; GCN-NEXT:    image_sample_c_cd_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_cd_cl_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v11, v7
+; GFX9-NEXT:    v_mov_b32_e32 v7, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v5, v0, v5
+; GFX9-NEXT:    v_and_b32_e32 v3, v0, v3
+; GFX9-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX9-NEXT:    v_lshl_or_b32 v10, v6, 16, v5
+; GFX9-NEXT:    v_lshl_or_b32 v9, v4, 16, v3
+; GFX9-NEXT:    v_lshl_or_b32 v8, v2, 16, v0
+; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[7:14], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cd_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v8, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v5, v8, v5
+; GFX10-NEXT:    v_and_b32_e32 v1, v8, v1
+; GFX10-NEXT:    v_and_b32_e32 v3, v8, v3
+; GFX10-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    v_lshl_or_b32 v6, v4, 16, v3
+; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], [v0, v1, v6, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
-; GCN-LABEL: sample_l_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GCN-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_l_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_l_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
-; GCN-LABEL: sample_l_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
-; GCN-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_l_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX9-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_l_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v1, v1, 16, v0
+; GFX10-NEXT:    image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
-; GCN-LABEL: sample_c_l_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_l_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_l_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
-; GCN-LABEL: sample_c_l_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v5, v3
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
-; GCN-NEXT:    image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_l_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-NEXT:    v_mov_b32_e32 v3, v0
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v4, v2, 16, v0
+; GFX9-NEXT:    image_sample_c_l v[0:3], v[3:5], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_l_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
-; GCN-LABEL: sample_lz_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_lz_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_lz_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
-; GCN-LABEL: sample_lz_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GCN-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GCN-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_lz_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_lz_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
-; GCN-LABEL: sample_c_lz_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_lz_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_lz_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
-; GCN-LABEL: sample_c_lz_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GCN-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
-; GCN-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_lz_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_lz_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
-; GCN-LABEL: sample_c_d_o_2darray_V1:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v13, v8
-; GCN-NEXT:    v_mov_b32_e32 v8, v0
-; GCN-NEXT:    v_mov_b32_e32 v0, 0xffff
-; GCN-NEXT:    v_mov_b32_e32 v9, v1
-; GCN-NEXT:    v_and_b32_e32 v1, v0, v6
-; GCN-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
-; GCN-NEXT:    v_and_b32_e32 v1, v0, v4
-; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
-; GCN-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
-; GCN-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
-; GCN-NEXT:    image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 a16 da
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_d_o_2darray_V1:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v13, v8
+; GFX9-NEXT:    v_mov_b32_e32 v8, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v9, v1
+; GFX9-NEXT:    v_and_b32_e32 v1, v0, v6
+; GFX9-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
+; GFX9-NEXT:    v_and_b32_e32 v1, v0, v4
+; GFX9-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX9-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
+; GFX9-NEXT:    image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_o_2darray_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v4, v9, v4
+; GFX10-NEXT:    v_and_b32_e32 v2, v9, v2
+; GFX10-NEXT:    v_and_b32_e32 v6, v9, v6
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    v_lshl_or_b32 v7, v7, 16, v6
+; GFX10-NEXT:    image_sample_c_d_o v0, [v0, v1, v2, v4, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret float %v
 }
 
 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
-; GCN-LABEL: sample_c_d_o_2darray_V2:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    v_mov_b32_e32 v13, v8
-; GCN-NEXT:    v_mov_b32_e32 v8, v0
-; GCN-NEXT:    v_mov_b32_e32 v0, 0xffff
-; GCN-NEXT:    v_mov_b32_e32 v9, v1
-; GCN-NEXT:    v_and_b32_e32 v1, v0, v6
-; GCN-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
-; GCN-NEXT:    v_and_b32_e32 v1, v0, v4
-; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
-; GCN-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
-; GCN-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
-; GCN-NEXT:    image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 a16 da
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
+; GFX9-LABEL: sample_c_d_o_2darray_V2:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v13, v8
+; GFX9-NEXT:    v_mov_b32_e32 v8, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v9, v1
+; GFX9-NEXT:    v_and_b32_e32 v1, v0, v6
+; GFX9-NEXT:    v_lshl_or_b32 v12, v7, 16, v1
+; GFX9-NEXT:    v_and_b32_e32 v1, v0, v4
+; GFX9-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX9-NEXT:    v_lshl_or_b32 v11, v5, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v10, v3, 16, v0
+; GFX9-NEXT:    image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 a16 da
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_o_2darray_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v9, 0xffff
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_and_b32_e32 v4, v9, v4
+; GFX10-NEXT:    v_and_b32_e32 v2, v9, v2
+; GFX10-NEXT:    v_and_b32_e32 v6, v9, v6
+; GFX10-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX10-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT:    v_lshl_or_b32 v7, v7, 16, v6
+; GFX10-NEXT:    image_sample_c_d_o v[0:1], [v0, v1, v2, v4, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <2 x float> %v

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
index 48d26f7db20a..488a07bf3731 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
@@ -1,8 +1,18 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 
-; GCN-LABEL: {{^}}store.f16.1d:
-; GCN: image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16
-define amdgpu_ps void @store.f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+define amdgpu_ps void @store_f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+; GFX9-LABEL: store_f16_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_f16_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %bitcast = bitcast <2 x i32> %val to <4 x half>
@@ -10,9 +20,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v2f16.1d:
-; GCN: image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16
-define amdgpu_ps void @store.v2f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+define amdgpu_ps void @store_v2f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+; GFX9-LABEL: store_v2f16_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v2f16_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %bitcast = bitcast <2 x i32> %val to <4 x half>
@@ -20,9 +38,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v3f16.1d:
-; GCN: image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16
-define amdgpu_ps void @store.v3f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+define amdgpu_ps void @store_v3f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+; GFX9-LABEL: store_v3f16_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v3f16_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %bitcast = bitcast <2 x i32> %val to <4 x half>
@@ -30,9 +56,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v4f16.1d:
-; GCN: image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16
-define amdgpu_ps void @store.v4f16.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+define amdgpu_ps void @store_v4f16_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+; GFX9-LABEL: store_v4f16_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v4f16_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %bitcast = bitcast <2 x i32> %val to <4 x half>
@@ -40,9 +74,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.f16.2d:
-; GCN: image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16
-define amdgpu_ps void @store.f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+define amdgpu_ps void @store_f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+; GFX9-LABEL: store_f16_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x1 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_f16_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %y = extractelement <2 x i16> %coords, i32 1
@@ -51,9 +93,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v2f16.2d:
-; GCN: image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16
-define amdgpu_ps void @store.v2f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+define amdgpu_ps void @store_v2f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+; GFX9-LABEL: store_v2f16_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x3 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v2f16_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %y = extractelement <2 x i16> %coords, i32 1
@@ -62,9 +112,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v3f16.2d:
-; GCN: image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16
-define amdgpu_ps void @store.v3f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+define amdgpu_ps void @store_v3f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+; GFX9-LABEL: store_v3f16_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x7 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v3f16_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %y = extractelement <2 x i16> %coords, i32 1
@@ -73,9 +131,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v4f16.2d:
-; GCN: image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16
-define amdgpu_ps void @store.v4f16.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+define amdgpu_ps void @store_v4f16_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <2 x i32> %val) {
+; GFX9-LABEL: store_v4f16_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0xf unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v4f16_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:2], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %y = extractelement <2 x i16> %coords, i32 1
@@ -84,9 +150,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.f16.3d:
-; GCN: image_store v[2:3], v[0:1], s[0:7] dmask:0x1 unorm a16 d16
-define amdgpu_ps void @store.f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
+define amdgpu_ps void @store_f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
+; GFX9-LABEL: store_f16_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x1 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_f16_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
   %y = extractelement <2 x i16> %coords_lo, i32 1
@@ -96,9 +170,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v2f16.3d:
-; GCN: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm a16 d16
-define amdgpu_ps void @store.v2f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
+define amdgpu_ps void @store_v2f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
+; GFX9-LABEL: store_v2f16_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v2f16_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
   %y = extractelement <2 x i16> %coords_lo, i32 1
@@ -108,9 +190,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v3f16.3d:
-; GCN: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
-define amdgpu_ps void @store.v3f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
+define amdgpu_ps void @store_v3f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
+; GFX9-LABEL: store_v3f16_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v3f16_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
   %y = extractelement <2 x i16> %coords_lo, i32 1
@@ -120,9 +210,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v4f16.3d:
-; GCN: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm a16 d16
-define amdgpu_ps void @store.v4f16.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
+define amdgpu_ps void @store_v4f16_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <2 x i32> %val) {
+; GFX9-LABEL: store_v4f16_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm a16 d16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v4f16_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 d16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
   %y = extractelement <2 x i16> %coords_lo, i32 1

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
index f5ec31ba7815..1217d295ad32 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
@@ -1,44 +1,86 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 
-; GCN-LABEL: {{^}}store.f32.1d:
-; GCN: image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16
-define amdgpu_ps void @store.f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+define amdgpu_ps void @store_f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+; GFX9-LABEL: store_f32_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_f32_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v2f32.1d:
-; GCN: image_store v[1:4], v0, s[0:7] dmask:0x3 unorm a16
-define amdgpu_ps void @store.v2f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+define amdgpu_ps void @store_v2f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+; GFX9-LABEL: store_v2f32_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x3 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v2f32_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v3f32.1d:
-; GCN: image_store v[1:4], v0, s[0:7] dmask:0x7 unorm a16
-define amdgpu_ps void @store.v3f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+define amdgpu_ps void @store_v3f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+; GFX9-LABEL: store_v3f32_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x7 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v3f32_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v4f32.1d:
-; GCN: image_store v[1:4], v0, s[0:7] dmask:0xf unorm a16
-define amdgpu_ps void @store.v4f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+define amdgpu_ps void @store_v4f32_1d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+; GFX9-LABEL: store_v4f32_1d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v4f32_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   call void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float> %val, i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store.f32.2d:
-; GCN: image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16
-define amdgpu_ps void @store.f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+define amdgpu_ps void @store_f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+; GFX9-LABEL: store_f32_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x1 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_f32_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %y = extractelement <2 x i16> %coords, i32 1
@@ -46,9 +88,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v2f32.2d:
-; GCN: image_store v[1:4], v0, s[0:7] dmask:0x3 unorm a16
-define amdgpu_ps void @store.v2f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+define amdgpu_ps void @store_v2f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+; GFX9-LABEL: store_v2f32_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x3 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v2f32_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %y = extractelement <2 x i16> %coords, i32 1
@@ -56,9 +106,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v3f32.2d:
-; GCN: image_store v[1:4], v0, s[0:7] dmask:0x7 unorm a16
-define amdgpu_ps void @store.v3f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+define amdgpu_ps void @store_v3f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+; GFX9-LABEL: store_v3f32_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x7 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v3f32_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %y = extractelement <2 x i16> %coords, i32 1
@@ -66,9 +124,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v4f32.2d:
-; GCN: image_store v[1:4], v0, s[0:7] dmask:0xf unorm a16
-define amdgpu_ps void @store.v4f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+define amdgpu_ps void @store_v4f32_2d(<8 x i32> inreg %rsrc, <2 x i16> %coords, <4 x float> %val) {
+; GFX9-LABEL: store_v4f32_2d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v4f32_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[1:4], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords, i32 0
   %y = extractelement <2 x i16> %coords, i32 1
@@ -76,9 +142,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.f32.3d:
-; GCN: image_store v[2:5], v[0:1], s[0:7] dmask:0x1 unorm a16
-define amdgpu_ps void @store.f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
+define amdgpu_ps void @store_f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
+; GFX9-LABEL: store_f32_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0x1 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_f32_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
   %y = extractelement <2 x i16> %coords_lo, i32 1
@@ -87,9 +161,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v2f32.3d:
-; GCN: image_store v[2:5], v[0:1], s[0:7] dmask:0x3 unorm a16
-define amdgpu_ps void @store.v2f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
+define amdgpu_ps void @store_v2f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
+; GFX9-LABEL: store_v2f32_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0x3 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v2f32_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
   %y = extractelement <2 x i16> %coords_lo, i32 1
@@ -98,9 +180,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v3f32.3d:
-; GCN: image_store v[2:5], v[0:1], s[0:7] dmask:0x7 unorm a16
-define amdgpu_ps void @store.v3f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
+define amdgpu_ps void @store_v3f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
+; GFX9-LABEL: store_v3f32_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0x7 unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v3f32_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
   %y = extractelement <2 x i16> %coords_lo, i32 1
@@ -109,9 +199,17 @@ main_body:
   ret void
 }
 
-; GCN-LABEL: {{^}}store.v4f32.3d:
-; GCN: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm a16
-define amdgpu_ps void @store.v4f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
+define amdgpu_ps void @store_v4f32_3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi, <4 x float> %val) {
+; GFX9-LABEL: store_v4f32_3d:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm a16
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_v4f32_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
+; GFX10-NEXT:    s_endpgm
 main_body:
   %x = extractelement <2 x i16> %coords_lo, i32 0
   %y = extractelement <2 x i16> %coords_lo, i32 1

diff  --git a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir
index 09ae71db38fa..4ad185e037f0 100644
--- a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir
+++ b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir
@@ -19,9 +19,9 @@ body:             |
     renamable $sgpr8 = COPY killed renamable $sgpr2
     renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr8_sgpr9, 144, 0, 0 :: (invariant load 32, align 16, addrspace 4)
     renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = COPY killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
-    renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16)
+    renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16)
     S_ENDPGM 0
 
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir b/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
index 39d3efe2a1de..5829cf60a58b 100644
--- a/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/nsa-vmem-hazard.mir
@@ -8,7 +8,7 @@
 name:            hazard_image_sample_d_buf_off6
 body:            |
   bb.0:
-    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec
 ...
 
@@ -19,7 +19,7 @@ body:            |
 name:            no_hazard_image_sample_d_buf_off1
 body:            |
   bb.0:
-    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, 0, 0, implicit $exec
 ...
 
@@ -31,7 +31,7 @@ body:            |
 name:            no_hazard_image_sample_d_buf_far
 body:            |
   bb.0:
-    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     V_NOP_e32 implicit $exec
     $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec
 ...
@@ -44,7 +44,7 @@ body:            |
 name:            no_hazard_image_sample_v4_v2_buf_off6
 body:            |
   bb.0:
-    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec
 ...
 
@@ -56,6 +56,6 @@ body:            |
 name:            no_hazard_image_sample_v4_v3_buf_off6
 body:            |
   bb.0:
-    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     $vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec
 ...