[llvm] r335222 - AMDGPU: Turn D16 for MIMG instructions into a regular operand

Thu Jun 21 06:36:01 PDT 2018

Author: nha
Date: Thu Jun 21 06:36:01 2018
New Revision: 335222

URL: http://llvm.org/viewvc/llvm-project?rev=335222&view=rev
Log:
AMDGPU: Turn D16 for MIMG instructions into a regular operand

Summary:
This allows us to reduce the number of different machine instruction
opcodes, which reduces the table sizes and helps flatten the TableGen
multiclass hierarchies.

We can do this because for each hardware MIMG opcode, we have a full set
of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata
and vaddr registers. Instead of having separate D16 machine instructions,
a packed D16 instructions loading e.g. 4 components can simply use the
same V2 opcode variant that non-D16 instructions use.

We still require a TSFlag for D16 buffer instructions, because the
D16-ness of buffer instructions is part of the opcode. Renaming the flag
should help avoid future confusion.

The one non-obvious code change is that for gather4 instructions, the
disassembler can no longer automatically decide whether to use a V2 or
a V4 variant. The existing logic which choose the correct variant for
other MIMG instruction is extended to cover gather4 as well.

As a bonus, some of the assembler error messages are now more helpful
(e.g., complaining about a wrong data size instead of a non-existing
instruction).

While we're at it, delete a whole bunch of dead legacy TableGen code.

Change-Id: I89b02c2841c06f95e662541433e597f5d4553978

Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor

Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D47434

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
    llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
    llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
    llvm/trunk/lib/Target/AMDGPU/SIDefines.h
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
    llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
    llvm/trunk/test/MC/AMDGPU/mimg.s

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Thu Jun 21 06:36:01 2018
@@ -109,8 +109,7 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(in
   // Adjust the encoding family to GFX80 for D16 buffer instructions when the
   // subtarget has UnpackedD16VMem feature.
   // TODO: remove this when we discard GFX80 encoding.
-  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
-                              && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
+  if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
     Gen = SIEncodingFamily::GFX80;
 
   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Thu Jun 21 06:36:01 2018
@@ -2301,10 +2301,6 @@ bool AMDGPUAsmParser::validateMIMGDataSi
   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
     return true;
 
-  // Gather4 instructions do not need validation: dst size is hardcoded.
-  if (Desc.TSFlags & SIInstrFlags::Gather4)
-    return true;
-
   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
@@ -2319,9 +2315,12 @@ bool AMDGPUAsmParser::validateMIMGDataSi
   if (DMask == 0)
     DMask = 1;
 
-  unsigned DataSize = countPopulation(DMask);
-  if ((Desc.TSFlags & SIInstrFlags::D16) != 0 && hasPackedD16()) {
-    DataSize = (DataSize + 1) / 2;
+  unsigned DataSize =
+    (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
+  if (hasPackedD16()) {
+    int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
+    if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
+      DataSize = (DataSize + 1) / 2;
   }
 
   return (VDataSize / 4) == DataSize + TFESize;
@@ -2389,10 +2388,14 @@ bool AMDGPUAsmParser::validateMIMGD16(co
 
   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
     return true;
-  if ((Desc.TSFlags & SIInstrFlags::D16) == 0)
-    return true;
 
-  return !isCI() && !isSI();
+  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
+  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
+    if (isCI() || isSI())
+      return false;
+  }
+
+  return true;
 }
 
 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
@@ -4261,6 +4264,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &In
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
 }
 
 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
@@ -4287,6 +4291,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defa
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
 }
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultD16() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyD16);
+}
+
 //===----------------------------------------------------------------------===//
 // smrd
 //===----------------------------------------------------------------------===//
@@ -4389,6 +4397,7 @@ static const OptionalOperand AMDGPUOptio
   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
+  {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
@@ -5094,8 +5103,6 @@ unsigned AMDGPUAsmParser::validateTarget
     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
   case MCK_glc:
     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
-  case MCK_d16:
-    return Operand.isD16() ? Match_Success : Match_InvalidOperand;
   case MCK_idxen:
     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
   case MCK_offen:

Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Thu Jun 21 06:36:01 2018
@@ -720,7 +720,7 @@ defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Ps
   "buffer_store_format_xyzw", VReg_128
 >;
 
-let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
   defm BUFFER_LOAD_FORMAT_D16_X_gfx80 : MUBUF_Pseudo_Loads <
     "buffer_load_format_d16_x", VGPR_32
   >;
@@ -747,7 +747,7 @@ let SubtargetPredicate = HasUnpackedD16V
   >;
 } // End HasUnpackedD16VMem.
 
-let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
   defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Pseudo_Loads <
     "buffer_load_format_d16_x", VGPR_32
   >;
@@ -990,7 +990,7 @@ defm TBUFFER_STORE_FORMAT_XY   : MTBUF_P
 defm TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz",  VReg_128>;
 defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
 
-let SubtargetPredicate = HasUnpackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
   defm TBUFFER_LOAD_FORMAT_D16_X_gfx80     : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_x",     VGPR_32>;
   defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80    : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_xy",    VReg_64>;
   defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80   : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_xyz",   VReg_96>;
@@ -1001,7 +1001,7 @@ let SubtargetPredicate = HasUnpackedD16V
   defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>;
 } // End HasUnpackedD16VMem.
 
-let SubtargetPredicate = HasPackedD16VMem, D16 = 1 in {
+let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
   defm TBUFFER_LOAD_FORMAT_D16_X     : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_x",     VGPR_32>;
   defm TBUFFER_LOAD_FORMAT_D16_XY    : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_xy",    VGPR_32>;
   defm TBUFFER_LOAD_FORMAT_D16_XYZ   : MTBUF_Pseudo_Loads  <"tbuffer_load_format_d16_xyz",   VReg_64>;

Modified: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp Thu Jun 21 06:36:01 2018
@@ -289,10 +289,6 @@ DecodeStatus AMDGPUDisassembler::convert
 // as if it has 1 dword, which could be not really so.
 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
 
-  if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4) {
-    return MCDisassembler::Success;
-  }
-
   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
                                            AMDGPU::OpName::vdst);
 
@@ -304,22 +300,25 @@ DecodeStatus AMDGPUDisassembler::convert
 
   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
                                             AMDGPU::OpName::tfe);
+  int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                            AMDGPU::OpName::d16);
 
   assert(VDataIdx != -1);
   assert(DMaskIdx != -1);
   assert(TFEIdx != -1);
 
   bool IsAtomic = (VDstIdx != -1);
+  bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
 
   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
   if (DMask == 0)
     return MCDisassembler::Success;
 
-  unsigned DstSize = countPopulation(DMask);
+  unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask);
   if (DstSize == 1)
     return MCDisassembler::Success;
 
-  bool D16 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::D16;
+  bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
   if (D16 && AMDGPU::hasPackedD16(STI)) {
     DstSize = (DstSize + 1) / 2;
   }
@@ -335,6 +334,11 @@ DecodeStatus AMDGPUDisassembler::convert
       NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize);
     }
     if (NewOpcode == -1) return MCDisassembler::Success;
+  } else if (IsGather4) {
+    if (D16 && AMDGPU::hasPackedD16(STI))
+      NewOpcode = AMDGPU::getMIMGGatherOpPackedD16(MI.getOpcode());
+    else
+      return MCDisassembler::Success;
   } else {
     NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize);
     assert(NewOpcode != -1 && "could not find matching mimg channel instruction");

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Thu Jun 21 06:36:01 2018
@@ -217,6 +217,11 @@ void AMDGPUInstPrinter::printLWE(const M
   printNamedBit(MI, OpNo, O, "lwe");
 }
 
+void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
+  printNamedBit(MI, OpNo, O, "d16");
+}
+
 void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &O) {

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Thu Jun 21 06:36:01 2018
@@ -84,6 +84,8 @@ private:
                  raw_ostream &O);
   void printLWE(const MCInst *MI, unsigned OpNo,
                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printD16(const MCInst *MI, unsigned OpNo,
+                const MCSubtargetInfo &STI, raw_ostream &O);
   void printExpCompr(const MCInst *MI, unsigned OpNo,
                      const MCSubtargetInfo &STI, raw_ostream &O);
   void printExpVM(const MCInst *MI, unsigned OpNo,

Modified: llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/MIMGInstructions.td Thu Jun 21 06:36:01 2018
@@ -17,6 +17,11 @@ class MIMG_Atomic_Size <string op, bit i
   int AtomicSize = !if(is32Bit, 1, 2);
 }
 
+class MIMG_Gather_Size <string op, int channels> {
+  string Op = op;
+  int Channels = channels;
+}
+
 class mimg <bits<7> si, bits<7> vi = si> {
   field bits<7> SI = si;
   field bits<7> VI = vi;
@@ -37,125 +42,88 @@ class MIMG_Helper <dag outs, dag ins, st
 class MIMG_NoSampler_Helper <bits<7> op, string asm,
                              RegisterClass dst_rc,
                              RegisterClass addr_rc,
-                             bit d16_bit=0,
-                             string dns=""> : MIMG_Helper <
-  (outs dst_rc:$vdata),
-  (ins addr_rc:$vaddr, SReg_256:$srsrc,
-       DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-       R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
-  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
-  dns>, MIMGe<op> {
+                             bit has_d16,
+                             string dns="">
+  : MIMG_Helper <(outs dst_rc:$vdata),
+                 !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
+                           DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+                           R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                      !if(has_d16, (ins D16:$d16), (ins))),
+                 asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+                   #!if(has_d16, "$d16", ""),
+                 dns>,
+    MIMGe<op> {
   let ssamp = 0;
-  let D16 = d16;
-}
 
-multiclass MIMG_NoSampler_Src_Helper_Helper <bits<7> op, string asm,
-                                             RegisterClass dst_rc,
-                                             int channels, bit d16_bit,
-                                             string suffix> {
-  def NAME # _V1 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, d16_bit,
-                                                   !if(!eq(channels, 1), "AMDGPU", "")>,
-                            MIMG_Mask<asm#"_V1"#suffix, channels>;
-  def NAME # _V2 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64, d16_bit>,
-                            MIMG_Mask<asm#"_V2"#suffix, channels>;
-  def NAME # _V3 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96, d16_bit>,
-                            MIMG_Mask<asm#"_V3"#suffix, channels>;
-  def NAME # _V4 # suffix : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128, d16_bit>,
-                            MIMG_Mask<asm#"_V4"#suffix, channels>;
+  let HasD16 = has_d16;
+  let d16 = !if(HasD16, ?, 0);
 }
 
 multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
-                                      RegisterClass dst_rc,
-                                      int channels> {
-  defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 0, "">;
-
-  let d16 = 1 in {
-    let SubtargetPredicate = HasPackedD16VMem in {
-      defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16">;
-    } // End HasPackedD16VMem.
-
-    let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
-      defm NAME : MIMG_NoSampler_Src_Helper_Helper <op, asm, dst_rc, channels, 1, "_D16_gfx80">;
-    } // End HasUnpackedD16VMem.
-  } // End d16 = 1.
-}
-
-multiclass MIMG_NoSampler <bits<7> op, string asm> {
-  defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
-  defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
-  defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
-  defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
-}
-
-multiclass MIMG_PckNoSampler <bits<7> op, string asm> {
-  defm NAME # _V1 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VGPR_32, 1, 0, "">;
-  defm NAME # _V2 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_64, 2, 0, "">;
-  defm NAME # _V3 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_96, 3, 0, "">;
-  defm NAME # _V4 : MIMG_NoSampler_Src_Helper_Helper <op, asm, VReg_128, 4, 0, "">;
+                                             RegisterClass dst_rc,
+                                             int channels, bit has_d16> {
+  def NAME # _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32, has_d16,
+                                         !if(!eq(channels, 1), "AMDGPU", "")>,
+                   MIMG_Mask<asm#"_V1", channels>;
+  def NAME # _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64, has_d16>,
+                   MIMG_Mask<asm#"_V2", channels>;
+  def NAME # _V3 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_96, has_d16>,
+                   MIMG_Mask<asm#"_V3", channels>;
+  def NAME # _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128, has_d16>,
+                   MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_NoSampler <bits<7> op, string asm, bit has_d16> {
+  defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1, has_d16>;
+  defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2, has_d16>;
+  defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3, has_d16>;
+  defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4, has_d16>;
 }
 
 class MIMG_Store_Helper <bits<7> op, string asm,
                          RegisterClass data_rc,
                          RegisterClass addr_rc,
-                         bit d16_bit=0,
-                         string dns = ""> : MIMG_Helper <
-  (outs),
-  (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
-       DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-       R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
-  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""), dns>, MIMGe<op> {
+                         bit has_d16,
+                         string dns = "">
+  : MIMG_Helper <(outs),
+                 !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+                           DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+                           R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                      !if(has_d16, (ins D16:$d16), (ins))),
+                 asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+                   #!if(has_d16, "$d16", ""),
+                 dns>,
+    MIMGe<op> {
   let ssamp = 0;
   let mayLoad = 0;
   let mayStore = 1;
   let hasSideEffects = 0;
   let hasPostISelHook = 0;
   let DisableWQM = 1;
-  let D16 = d16;
-}
 
-multiclass MIMG_Store_Addr_Helper_Helper <bits<7> op, string asm,
-                                  RegisterClass data_rc,
-                                  int channels, bit d16_bit,
-                                  string suffix> {
-  def NAME # _V1 # suffix : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, d16_bit,
-                                               !if(!eq(channels, 1), "AMDGPU", "")>,
-                            MIMG_Mask<asm#"_V1"#suffix, channels>;
-  def NAME # _V2 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_64, d16_bit>,
-                            MIMG_Mask<asm#"_V2"#suffix, channels>;
-  def NAME # _V3 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_96, d16_bit>,
-                            MIMG_Mask<asm#"_V3"#suffix, channels>;
-  def NAME # _V4 # suffix : MIMG_Store_Helper <op, asm, data_rc, VReg_128, d16_bit>,
-                            MIMG_Mask<asm#"_V4"#suffix, channels>;
+  let HasD16 = has_d16;
+  let d16 = !if(HasD16, ?, 0);
 }
 
 multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
                                   RegisterClass data_rc,
-                                  int channels> {
-  defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 0, "">;
-
-  let d16 = 1 in {
-    let SubtargetPredicate = HasPackedD16VMem in {
-      defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16">;
-    } // End HasPackedD16VMem.
-
-    let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
-      defm NAME : MIMG_Store_Addr_Helper_Helper <op, asm, data_rc, channels, 1, "_D16_gfx80">;
-    } // End HasUnpackedD16VMem.
-  } // End d16 = 1.
-}
-
-multiclass MIMG_Store <bits<7> op, string asm> {
-  defm _V1 : MIMG_Store_Addr_Helper <op, asm, VGPR_32, 1>;
-  defm _V2 : MIMG_Store_Addr_Helper <op, asm, VReg_64, 2>;
-  defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 3>;
-  defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 4>;
-}
-
-multiclass MIMG_PckStore <bits<7> op, string asm> {
-  defm NAME # _V1 : MIMG_Store_Addr_Helper_Helper <op, asm, VGPR_32, 1, 0, "">;
-  defm NAME # _V2 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_64, 2, 0, "">;
-  defm NAME # _V3 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_96, 3, 0, "">;
-  defm NAME # _V4 : MIMG_Store_Addr_Helper_Helper <op, asm, VReg_128, 4, 0, "">;
+                                  int channels, bit has_d16> {
+  def NAME # _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32, has_d16,
+                                      !if(!eq(channels, 1), "AMDGPU", "")>,
+                   MIMG_Mask<asm#"_V1", channels>;
+  def NAME # _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64, has_d16>,
+                   MIMG_Mask<asm#"_V2", channels>;
+  def NAME # _V3 : MIMG_Store_Helper <op, asm, data_rc, VReg_96, has_d16>,
+                   MIMG_Mask<asm#"_V3", channels>;
+  def NAME # _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128, has_d16>,
+                   MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_Store <bits<7> op, string asm, bit has_d16> {
+  defm _V1 : MIMG_Store_Addr_Helper <op, asm, VGPR_32, 1, has_d16>;
+  defm _V2 : MIMG_Store_Addr_Helper <op, asm, VReg_64, 2, has_d16>;
+  defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 3, has_d16>;
+  defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 4, has_d16>;
 }
 
 class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
@@ -177,23 +145,27 @@ class MIMG_Atomic_Helper <string asm, Re
 }
 
 class MIMG_Atomic_Real_si<mimg op, string name, string asm,
-  RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> :
-  MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
-  SIMCInstr<name, SIEncodingFamily.SI>,
-  MIMGe<op.SI> {
+                          RegisterClass data_rc, RegisterClass addr_rc,
+                          bit enableDasm>
+  : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "SICI", enableDasm>,
+    SIMCInstr<name, SIEncodingFamily.SI>,
+    MIMGe<op.SI> {
   let isCodeGenOnly = 0;
   let AssemblerPredicates = [isSICI];
   let DisableDecoder = DisableSIDecoder;
+  let d16 = 0;
 }
 
 class MIMG_Atomic_Real_vi<mimg op, string name, string asm,
-  RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm> :
-  MIMG_Atomic_Helper<asm, data_rc, addr_rc, "VI", enableDasm>,
-  SIMCInstr<name, SIEncodingFamily.VI>,
-  MIMGe<op.VI> {
+                          RegisterClass data_rc, RegisterClass addr_rc,
+                          bit enableDasm>
+  : MIMG_Atomic_Helper<asm, data_rc, addr_rc, "VI", enableDasm>,
+    SIMCInstr<name, SIEncodingFamily.VI>,
+    MIMGe<op.VI> {
   let isCodeGenOnly = 0;
   let AssemblerPredicates = [isVI];
   let DisableDecoder = DisableVIDecoder;
+  let d16 = 0;
 }
 
 multiclass MIMG_Atomic_Helper_m <mimg op,
@@ -245,59 +217,46 @@ multiclass MIMG_Atomic <mimg op, string
 class MIMG_Sampler_Helper <bits<7> op, string asm,
                            RegisterClass dst_rc,
                            RegisterClass src_rc,
-                           bit wqm,
-                           bit d16_bit=0,
-                           string dns=""> : MIMG_Helper <
-  (outs dst_rc:$vdata),
-  (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
-       DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-       R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
-  asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
-  dns>, MIMGe<op> {
+                           bit wqm, bit has_d16,
+                           string dns="">
+  : MIMG_Helper <(outs dst_rc:$vdata),
+                 !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+                           DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+                           R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
+                      !if(has_d16, (ins D16:$d16), (ins))),
+                 asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+                   #!if(has_d16, "$d16", ""),
+                 dns>,
+    MIMGe<op> {
   let WQM = wqm;
-  let D16 = d16;
-}
 
-multiclass MIMG_Sampler_Src_Helper_Helper <bits<7> op, string asm,
-                                    RegisterClass dst_rc,
-                                    int channels, bit wqm,
-                                    bit d16_bit, string suffix> {
-    def _V1 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit,
-                                   !if(!eq(channels, 1), "AMDGPU", "")>,
-                       MIMG_Mask<asm#"_V1"#suffix, channels>;
-    def _V2 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>,
-                       MIMG_Mask<asm#"_V2"#suffix, channels>;
-    def _V3 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_96, wqm, d16_bit>,
-                       MIMG_Mask<asm#"_V3"#suffix, channels>;
-    def _V4 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>,
-                       MIMG_Mask<asm#"_V4"#suffix, channels>;
-    def _V8 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>,
-                       MIMG_Mask<asm#"_V8"#suffix, channels>;
-    def _V16 # suffix : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>,
-                        MIMG_Mask<asm#"_V16"#suffix, channels>;
+  let HasD16 = has_d16;
+  let d16 = !if(HasD16, ?, 0);
 }
 
 multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
                                     RegisterClass dst_rc,
-                                    int channels, bit wqm> {
-  defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 0, "">;
-
-  let d16 = 1 in {
-    let SubtargetPredicate = HasPackedD16VMem in {
-      defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16">;
-    } // End HasPackedD16VMem.
-
-    let SubtargetPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
-      defm "" : MIMG_Sampler_Src_Helper_Helper <op, asm, dst_rc, channels, wqm, 1, "_D16_gfx80">;
-    } // End HasUnpackedD16VMem.
-  } // End d16 = 1.
-}
-
-multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm=0> {
-  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, wqm>;
-  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, wqm>;
-  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, wqm>;
-  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, wqm>;
+                                    int channels, bit wqm, bit has_d16> {
+    def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm, has_d16,
+                                   !if(!eq(channels, 1), "AMDGPU", "")>,
+              MIMG_Mask<asm#"_V1", channels>;
+    def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm, has_d16>,
+              MIMG_Mask<asm#"_V2", channels>;
+    def _V3 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_96, wqm, has_d16>,
+              MIMG_Mask<asm#"_V3", channels>;
+    def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm, has_d16>,
+              MIMG_Mask<asm#"_V4", channels>;
+    def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm, has_d16>,
+              MIMG_Mask<asm#"_V8", channels>;
+    def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm, has_d16>,
+               MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm = 0, bit has_d16 = 1> {
+  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, wqm, has_d16>;
+  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, wqm, has_d16>;
+  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, wqm, has_d16>;
+  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, wqm, has_d16>;
 }
 
 multiclass MIMG_Sampler_WQM <bits<7> op, string asm> : MIMG_Sampler<op, asm, 1>;
@@ -306,14 +265,14 @@ class MIMG_Gather_Helper <bits<7> op, st
                           RegisterClass dst_rc,
                           RegisterClass src_rc,
                           bit wqm,
-                          bit d16_bit=0,
-                          string dns=""> : MIMG <
-  (outs dst_rc:$vdata),
-  (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
-       DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
-       R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
-  asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"#!if(d16_bit, " d16", ""),
-  []>, MIMGe<op> {
+                          string dns="">
+  : MIMG <(outs dst_rc:$vdata),
+          (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+               DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
+               R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da, D16:$d16),
+          asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da$d16",
+          []>,
+    MIMGe<op> {
   let mayLoad = 1;
   let mayStore = 0;
 
@@ -327,7 +286,7 @@ class MIMG_Gather_Helper <bits<7> op, st
   let Gather4 = 1;
   let hasPostISelHook = 0;
   let WQM = wqm;
-  let D16 = d16;
+  let HasD16 = 1;
 
   let DecoderNamespace = dns;
   let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
@@ -336,29 +295,25 @@ class MIMG_Gather_Helper <bits<7> op, st
 
 multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
                                     RegisterClass dst_rc,
-                                    bit wqm, bit d16_bit,
-                                    string prefix,
-                                    string suffix> {
-  def prefix # _V1 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm, d16_bit, "AMDGPU">;
-  def prefix # _V2 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm, d16_bit>;
-  def prefix # _V3 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_96, wqm, d16_bit>;
-  def prefix # _V4 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm, d16_bit>;
-  def prefix # _V8 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm, d16_bit>;
-  def prefix # _V16 # suffix : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm, d16_bit>;
+                                    int channels, bit wqm> {
+  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm,
+                                !if(!eq(channels, 4), "AMDGPU", "")>,
+            MIMG_Gather_Size<asm#"_V1", channels>;
+  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
+            MIMG_Gather_Size<asm#"_V2", channels>;
+  def _V3 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_96, wqm>,
+            MIMG_Gather_Size<asm#"_V3", channels>;
+  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
+            MIMG_Gather_Size<asm#"_V4", channels>;
+  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
+            MIMG_Gather_Size<asm#"_V8", channels>;
+  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
+             MIMG_Gather_Size<asm#"_V16", channels>;
 }
 
 multiclass MIMG_Gather <bits<7> op, string asm, bit wqm=0> {
-  defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_128, wqm, 0, "_V4", "">;
-
-  let d16 = 1 in {
-    let AssemblerPredicate = HasPackedD16VMem in {
-      defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_64, wqm, 1, "_V2", "_D16">;
-    } // End HasPackedD16VMem.
-
-    let AssemblerPredicate = HasUnpackedD16VMem, DecoderNamespace = "GFX80_UNPACKED" in {
-      defm "" : MIMG_Gather_Src_Helper<op, asm, VReg_128, wqm, 1, "_V4", "_D16_gfx80">;
-    } // End HasUnpackedD16VMem.
-  } // End d16 = 1.
+  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, wqm>; /* for packed D16 only */
+  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, wqm>;
 }
 
 multiclass MIMG_Gather_WQM <bits<7> op, string asm> : MIMG_Gather<op, asm, 1>;
@@ -367,19 +322,19 @@ multiclass MIMG_Gather_WQM <bits<7> op,
 // MIMG Instructions
 //===----------------------------------------------------------------------===//
 let SubtargetPredicate = isGCN in {
-defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
-defm IMAGE_LOAD_PCK : MIMG_PckNoSampler <0x00000002, "image_load_pck">;
-defm IMAGE_LOAD_PCK_SGN : MIMG_PckNoSampler <0x00000003, "image_load_pck_sgn">;
-defm IMAGE_LOAD_MIP_PCK : MIMG_PckNoSampler <0x00000004, "image_load_mip_pck">;
-defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_PckNoSampler <0x00000005, "image_load_mip_pck_sgn">;
-defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store">;
-defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip">;
-defm IMAGE_STORE_PCK : MIMG_PckStore <0x0000000a, "image_store_pck">;
-defm IMAGE_STORE_MIP_PCK : MIMG_PckStore <0x0000000b, "image_store_mip_pck">;
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load", 1>;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip", 1>;
+defm IMAGE_LOAD_PCK : MIMG_NoSampler <0x00000002, "image_load_pck", 0>;
+defm IMAGE_LOAD_PCK_SGN : MIMG_NoSampler <0x00000003, "image_load_pck_sgn", 0>;
+defm IMAGE_LOAD_MIP_PCK : MIMG_NoSampler <0x00000004, "image_load_mip_pck", 0>;
+defm IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoSampler <0x00000005, "image_load_mip_pck_sgn", 0>;
+defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store", 1>;
+defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip", 1>;
+defm IMAGE_STORE_PCK : MIMG_Store <0x0000000a, "image_store_pck", 0>;
+defm IMAGE_STORE_MIP_PCK : MIMG_Store <0x0000000b, "image_store_mip_pck", 0>;
 
 let mayLoad = 0, mayStore = 0 in {
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo", 0>;
 }
 
 defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimg<0x0f, 0x10>, "image_atomic_swap">;
@@ -457,7 +412,7 @@ defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gathe
 defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
 
 let mayLoad = 0, mayStore = 0 in {
-defm IMAGE_GET_LOD          : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
+defm IMAGE_GET_LOD          : MIMG_Sampler <0x00000060, "image_get_lod", 1, 0>;
 }
 
 defm IMAGE_SAMPLE_CD        : MIMG_Sampler <0x00000068, "image_sample_cd">;
@@ -519,13 +474,13 @@ class makeRegSequence<ValueType vt, Regi
 }
 
 class ImageDimPattern<AMDGPUImageDimIntrinsic I,
-                      string dop, ValueType dty,
+                      string dop, ValueType dty, bit d16,
                       string suffix = ""> : GCNPat<(undef), (undef)> {
   list<AMDGPUArg> AddrArgs = I.P.AddrDefaultArgs;
   getDwordsType AddrDwords = getDwordsType<!size(AddrArgs)>;
 
-  Instruction MI =
-    !cast<Instruction>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
+  MIMG MI =
+    !cast<MIMG>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
 
   // DAG fragment to match data arguments (vdata for store/atomic, dmask
   // for non-atomic).
@@ -581,7 +536,8 @@ class ImageDimPattern<AMDGPUImageDimIntr
              0, /* r128 */
              0, /* tfe */
              0 /*(as_i1imm $lwe)*/,
-             { I.P.Dim.DA }));
+             { I.P.Dim.DA }),
+         !if(MI.HasD16, (MI d16), (MI)));
   let ResultInstrs = [
     !if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction)
   ];
@@ -589,23 +545,23 @@ class ImageDimPattern<AMDGPUImageDimIntr
 
 foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
                            AMDGPUImageDimGetResInfoIntrinsics) in {
-  def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32>;
-  def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32>;
-  def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32>;
+  def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32, 0>;
+  def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32, 0>;
+  def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32, 0>;
 }
 
 multiclass ImageDimD16Helper<AMDGPUImageDimIntrinsic I,
                              AMDGPUImageDimIntrinsic d16helper> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, "_D16_gfx80">;
-    def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, "_D16_gfx80">;
-    def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, "_D16_gfx80">;
+    def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, 1>;
+    def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, 1>;
+    def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    def _packed_v1 : ImageDimPattern<I, "_V1", f16, "_D16">;
-    def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, "_D16">;
-    def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, "_D16">;
+    def _packed_v1 : ImageDimPattern<I, "_V1", f16, 1>;
+    def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, 1>;
+    def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
@@ -627,7 +583,7 @@ foreach intr = AMDGPUImageDimIntrinsics
 }
 
 foreach intr = AMDGPUImageDimGatherIntrinsics in {
-  def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32>;
+  def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32, 0>;
 
   def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
     let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
@@ -643,16 +599,16 @@ foreach intr = AMDGPUImageDimGatherIntri
     def intr#_unpacked_v4 :
         ImageDimPattern<!cast<AMDGPUImageDimIntrinsic>(
                             "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
-                        "_V4", v4i32, "_D16_gfx80">;
+                        "_V4", v4i32, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, "_D16">;
+    def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 foreach intr = AMDGPUImageDimAtomicIntrinsics in {
-  def intr#_pat1 : ImageDimPattern<intr, "_V1", i32>;
+  def intr#_pat1 : ImageDimPattern<intr, "_V1", i32, 0>;
 }
 
 /********** ======================= **********/
@@ -663,154 +619,160 @@ foreach intr = AMDGPUImageDimAtomicIntri
 // TODO:
 // 1. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128).
 // 2. Add A16 support when we pass address of half type.
-multiclass ImageSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt>  {
+multiclass ImageSamplePattern<SDPatternOperator name, MIMG opcode,
+                              ValueType dt, ValueType vt, bit d16>  {
   def : GCNPat<
     (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc,
         i1:$slc, i1:$lwe, i1:$da)),
-    (opcode $addr, $rsrc, $sampler,
-          (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
-          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+    !con((opcode $addr, $rsrc, $sampler, (as_i32imm $dmask), (as_i1imm $unorm),
+                (as_i1imm $glc), (as_i1imm $slc), 0, 0, (as_i1imm $lwe),
+                (as_i1imm $da)),
+         !if(opcode.HasD16, (opcode d16), (opcode)))
     >;
 }
 
-multiclass ImageSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, f32>;
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2f32>;
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4f32>;
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V8 # suffix), dt, v8f32>;
-  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V16 # suffix), dt, v16f32>;
+multiclass ImageSampleDataPatterns<SDPatternOperator name, string opcode,
+                                   ValueType dt, bit d16> {
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V1), dt, f32, d16>;
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V2), dt, v2f32, d16>;
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V4), dt, v4f32, d16>;
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V8), dt, v8f32, d16>;
+  defm : ImageSamplePattern<name, !cast<MIMG>(opcode # _V16), dt, v16f32, d16>;
 }
 
 // ImageSample patterns.
 multiclass ImageSamplePatterns<SDPatternOperator name, string opcode> {
-  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>;
-  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
-  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
 
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 // ImageSample alternative patterns for illegal vector half Types.
 multiclass ImageSampleAltPatterns<SDPatternOperator name, string opcode> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
   } // End HasUnpackedD16VMem.
 }
 
 // ImageGather4 patterns.
 multiclass ImageGather4Patterns<SDPatternOperator name, string opcode> {
-  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+  defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 // ImageGather4 alternative patterns for illegal vector half Types.
 multiclass ImageGather4AltPatterns<SDPatternOperator name, string opcode> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+    defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
   } // End HasUnpackedD16VMem.
-
 }
 
 // ImageLoad for amdgcn.
-multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode,
+                            ValueType dt, ValueType vt, bit d16> {
   def : GCNPat <
     (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe,
                 i1:$da)),
-    (opcode $addr, $rsrc,
-          (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
-          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+    !con((opcode $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc),
+                 (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)),
+         !if(opcode.HasD16, (opcode d16), (opcode)))
   >;
 }
 
-multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
-  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>;
-  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>;
-  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>;
+multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode,
+                                 ValueType dt, bit d16> {
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1), dt, i32, d16>;
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32, d16>;
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32, d16>;
 }
 
 // ImageLoad patterns.
 // TODO: support v3f32.
 multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
-  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32>;
-  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
-  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
 
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 // ImageLoad alternative patterns for illegal vector half Types.
 multiclass ImageLoadAltPatterns<SDPatternOperator name, string opcode> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
-    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32,  "_D16_gfx80">;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+    defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
   } // End HasUnPackedD16VMem.
 }
 
 // ImageStore for amdgcn.
-multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode,
+                             ValueType dt, ValueType vt, bit d16> {
   def : GCNPat <
     (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc,
           i1:$lwe, i1:$da),
-    (opcode $data, $addr, $rsrc,
-          (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
-          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+    !con((opcode $data, $addr, $rsrc, (as_i32imm $dmask), 1, (as_i1imm $glc),
+                 (as_i1imm $slc), 0, 0, (as_i1imm $lwe), (as_i1imm $da)),
+         !if(opcode.HasD16, (opcode d16), (opcode)))
   >;
 }
 
-multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt, string suffix = ""> {
-  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1 # suffix), dt, i32>;
-  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2 # suffix), dt, v2i32>;
-  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4 # suffix), dt, v4i32>;
+multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode,
+                                  ValueType dt, bit d16> {
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1), dt, i32, d16>;
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32, d16>;
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32, d16>;
 }
 
 // ImageStore patterns.
 // TODO: support v3f32.
 multiclass ImageStorePatterns<SDPatternOperator name, string opcode> {
-  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32>;
-  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
-  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32, 0>;
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32, 0>;
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32, 0>;
 
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16_gfx80">;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, 1>;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, 1>;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v4f16, 1>;
   } // End HasPackedD16VMem.
 }
 
 // ImageStore alternative patterns.
 multiclass ImageStoreAltPatterns<SDPatternOperator name, string opcode> {
   let SubtargetPredicate = HasUnpackedD16VMem in {
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4i32, 1>;
   } // End HasUnpackedD16VMem.
 
   let SubtargetPredicate = HasPackedD16VMem in {
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), i32, "_D16">;
-    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), i32, 1>;
+    defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2i32, 1>;
   } // End HasPackedD16VMem.
 }
 
@@ -1030,83 +992,3 @@ defm : ImageAtomicPatterns<int_amdgcn_im
 defm : ImageAtomicPatterns<int_amdgcn_image_atomic_xor, "IMAGE_ATOMIC_XOR">;
 defm : ImageAtomicPatterns<int_amdgcn_image_atomic_inc, "IMAGE_ATOMIC_INC">;
 defm : ImageAtomicPatterns<int_amdgcn_image_atomic_dec, "IMAGE_ATOMIC_DEC">;
-
-/* SIsample for simple 1D texture lookup */
-def : GCNPat <
-  (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
-  (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT),
-    (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
->;
-
-class SampleShadowPattern<SDNode name, MIMG opcode,
-                          ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SampleShadowArrayPattern<SDNode name, MIMG opcode,
-                               ValueType vt> : GCNPat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
->;
-
-/* SIsample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
-                          MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
-MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
-  def : SamplePattern <SIsample, sample, addr_type>;
-  def : SampleRectPattern <SIsample, sample, addr_type>;
-  def : SampleArrayPattern <SIsample, sample, addr_type>;
-  def : SampleShadowPattern <SIsample, sample_c, addr_type>;
-  def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
-
-  def : SamplePattern <SIsamplel, sample_l, addr_type>;
-  def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
-  def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
-  def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
-
-  def : SamplePattern <SIsampleb, sample_b, addr_type>;
-  def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
-  def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
-  def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
-
-  def : SamplePattern <SIsampled, sample_d, addr_type>;
-  def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
-  def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
-  def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
-}
-
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
-                      IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
-                      IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
-                      IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
-                      v2i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
-                      IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
-                      IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
-                      IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
-                      v4i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
-                      IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
-                      IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
-                      IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
-                      v8i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
-                      IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
-                      IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
-                      IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
-                      v16i32>;

Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Thu Jun 21 06:36:01 2018
@@ -87,8 +87,8 @@ enum : uint64_t {
   // Is a packed VOP3P instruction.
   IsPacked = UINT64_C(1) << 49,
 
-  // "d16" bit set or not.
-  D16 = UINT64_C(1) << 50
+  // Is a D16 buffer instruction.
+  D16Buf = UINT64_C(1) << 50
 };
 
 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Jun 21 06:36:01 2018
@@ -7762,9 +7762,16 @@ static unsigned SubIdx2Lane(unsigned Idx
 /// Adjust the writemask of MIMG instructions
 SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
                                           SelectionDAG &DAG) const {
+  unsigned Opcode = Node->getMachineOpcode();
+
+  // Subtract 1 because the vdata output is not a MachineSDNode operand.
+  int D16Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::d16) - 1;
+  if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
+    return Node; // not implemented for D16
+
   SDNode *Users[4] = { nullptr };
   unsigned Lane = 0;
-  unsigned DmaskIdx = (Node->getNumOperands() - Node->getNumValues() == 9) ? 2 : 3;
+  unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
   unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
   unsigned NewDmask = 0;
   bool HasChain = Node->getNumValues() > 1;
@@ -7936,7 +7943,7 @@ SDNode *SITargetLowering::PostISelFoldin
   unsigned Opcode = Node->getMachineOpcode();
 
   if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
-      !TII->isGather4(Opcode) && !TII->isD16(Opcode)) {
+      !TII->isGather4(Opcode)) {
     return adjustWritemask(Node, DAG);
   }
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Thu Jun 21 06:36:01 2018
@@ -118,8 +118,8 @@ class InstSI <dag outs, dag ins, string
   // This bit indicates that this is a packed VOP3P instruction
   field bit IsPacked = 0;
 
-  // This bit indicates that this is a D16 instruction.
-  field bit D16 = 0;
+  // This bit indicates that this is a D16 buffer instruction.
+  field bit D16Buf = 0;
 
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = SALU;
@@ -176,7 +176,7 @@ class InstSI <dag outs, dag ins, string
 
   let TSFlags{49} = IsPacked;
 
-  let TSFlags{50} = D16;
+  let TSFlags{50} = D16Buf;
 
   let SchedRW = [Write32Bit];
 
@@ -255,7 +255,7 @@ class MIMGe <bits<7> op> : Enc64 {
   bits<1> tfe;
   bits<1> lwe;
   bits<1> slc;
-  bits<1> d16 = 0;
+  bit d16;
   bits<8> vaddr;
   bits<7> srsrc;
   bits<7> ssamp;
@@ -344,4 +344,6 @@ class MIMG <dag outs, dag ins, string as
 
   let UseNamedOperandTable = 1;
   let hasSideEffects = 0; // XXX ????
+
+  bit HasD16 = 0;
 }

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Thu Jun 21 06:36:01 2018
@@ -445,14 +445,6 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
   }
 
-  static bool isD16(const MachineInstr &MI) {
-    return MI.getDesc().TSFlags & SIInstrFlags::D16;
-  }
-
-  bool isD16(uint16_t Opcode) const {
-    return get(Opcode).TSFlags & SIInstrFlags::D16;
-  }
-
   static bool isFLAT(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
   }

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Thu Jun 21 06:36:01 2018
@@ -300,16 +300,6 @@ def SIImage_gather4_c_b_o     : SDTImage
 def SIImage_gather4_c_b_cl_o  : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_B_CL_O">;
 def SIImage_gather4_c_lz_o    : SDTImage_sample<"AMDGPUISD::IMAGE_GATHER4_C_LZ_O">;
 
-class SDSample<string opcode> : SDNode <opcode,
-  SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
-                       SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
->;
-
-def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
-def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
-def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
-def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
-
 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
   SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
 >;
@@ -2079,6 +2069,14 @@ def getMIMGAtomicOp2 : InstrMapping {
   let ValueCols = [["1"]];
 }
 
+def getMIMGGatherOpPackedD16 : InstrMapping {
+  let FilterClass = "MIMG_Gather_Size";
+  let RowFields = ["Op"];
+  let ColFields = ["Channels"];
+  let KeyCol = ["4"];
+  let ValueCols = [["2"]];
+}
+
 // Maps an commuted opcode to its original version
 def getCommuteOrig : InstrMapping {
   let FilterClass = "Commutable_REV";

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Thu Jun 21 06:36:01 2018
@@ -166,6 +166,9 @@ int getMaskedMIMGAtomicOp(const MCInstrI
                           unsigned Opc, unsigned NewChannels);
 
 LLVM_READONLY
+int getMIMGGatherOpPackedD16(uint16_t Opcode);
+
+LLVM_READONLY
 int getMCOpcode(uint16_t Opcode, unsigned Gen);
 
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,

Modified: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir Thu Jun 21 06:36:01 2018
@@ -61,7 +61,7 @@ body:             |
     %11.sub6 = COPY %1
     %11.sub7 = COPY %1
     %11.sub8 = COPY %1
-    dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec
+    dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec
     %20.sub1 = COPY %2
     %20.sub2 = COPY %2
     %20.sub3 = COPY %2
@@ -70,6 +70,6 @@ body:             |
     %20.sub6 = COPY %2
     %20.sub7 = COPY %2
     %20.sub8 = COPY %2
-    dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec
+    dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir Thu Jun 21 06:36:01 2018
@@ -305,11 +305,11 @@ body:             |
 
 # GCN-LABEL: {{^}}name: image_clause{{$}}
 # GCN:      early-clobber %4:vreg_128, early-clobber %3:vreg_128, early-clobber %5:vreg_128 = BUNDLE %0, undef %2:sreg_128, %1, implicit $exec {
-# GCN-NEXT:   %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-# GCN-NEXT:   %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
-# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 
 ---
 name:            image_clause
@@ -325,17 +325,17 @@ body:             |
   bb.0:
     %0 = IMPLICIT_DEF
     %1 = IMPLICIT_DEF
-    %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-    %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-    %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
-    IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
-    IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
-    IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
+    %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sreg_128, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 ...
 
 # GCN-LABEL: {{^}}name: mixed_clause{{$}}
 # GCN:      dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec {
-# GCN-NEXT:   dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+# GCN-NEXT:   dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # GCN-NEXT:   dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
 # GCN-NEXT:   dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
 # GCN-NEXT: }
@@ -355,7 +355,7 @@ body:             |
     %0 = IMPLICIT_DEF
     %1 = IMPLICIT_DEF
     %2 = IMPLICIT_DEF
-    %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
+    %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec
     %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec
 ...

Modified: llvm/trunk/test/MC/AMDGPU/mimg.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/mimg.s?rev=335222&r1=335221&r2=335222&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/mimg.s (original)
+++ llvm/trunk/test/MC/AMDGPU/mimg.s Thu Jun 21 06:36:01 2018
@@ -356,20 +356,19 @@ image_gather4 v[5:8], v[1:4], s[8:15], s
 // GCN: image_gather4 v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x8 ; encoding: [0x00,0x08,0x00,0xf1,0x01,0x05,0x62,0x00]
 
 image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16
-// NOSICI:   error: instruction not supported on this GPU
+// NOSICI:   error: d16 modifier is not supported on this GPU
 // GFX8_0:   image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
-// NOGFX8_1: error: instruction not supported on this GPU
-// NOGFX9:   error: instruction not supported on this GPU
+// NOGFX8_1: error: image data size does not match dmask and tfe
+// NOGFX9:   error: image data size does not match dmask and tfe
 
 image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16
 // NOSICI:   error: d16 modifier is not supported on this GPU
-// NOGFX8_0: error: instruction not supported on this GPU
+// NOGFX8_0: error: image data size does not match dmask and tfe
 // GFX8_1:   image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
 // GFX9:     image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
 
-// FIXME: d16 is handled as an optional modifier, should it be corrected?
 image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1
-// NOSICI:   error: d16 modifier is not supported on this GPU
-// NOGFX8_0: error: instruction not supported on this GPU
-// GFX8_1:   image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
-// GFX9:     image_gather4 v[5:6], v1, s[8:15], s[12:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0x05,0x62,0x80]
+// NOSICI:   error: image data size does not match dmask and tfe
+// NOGFX8_0: error: image data size does not match dmask and tfe
+// NOGFX8_1: error: image data size does not match dmask and tfe
+// NOGFX9:   error: image data size does not match dmask and tfe