[llvm] 00aa0ae - [NVPTX] Add imm variants for surface and texture instructions

Andrew Savonichev via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 10 08:05:53 PST 2021


Author: Andrew Savonichev
Date: 2021-11-10T19:05:03+03:00
New Revision: 00aa0aeb067bbfda274aafdcabe9f058959db66b

URL: https://github.com/llvm/llvm-project/commit/00aa0aeb067bbfda274aafdcabe9f058959db66b
DIFF: https://github.com/llvm/llvm-project/commit/00aa0aeb067bbfda274aafdcabe9f058959db66b.diff

LOG: [NVPTX] Add imm variants for surface and texture instructions

Texture/sampler/surface operands can be either a register or an
immediate (an index of .texref, .samplerref or .surfref).

TableGen declarations for these instructions used to only have
Int64Regs operands, so this caused issues when machine verifier
is turned on:

    *** Bad machine code: Expected a register operand. ***
    - function:    bar
    - basic block: %bb.0  (0x55b144d99ab8)
    - instruction: %4:int32regs = SULD_1D_I32_TRAP 0, killed %2:int32regs
    - operand 1:   0

The solution is to duplicate these instructions for all possible
operand types (i16imm and Int64Regs). Since this would
essentially double the amount code in TableGen, the patch also
does some refactoring for the original instructions to keep
things manageable.

Differential Revision: https://reviews.llvm.org/D112232

Added: 
    

Modified: 
    llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
    llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
    llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
    llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
    llvm/test/CodeGen/NVPTX/surf-read.ll
    llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
    llvm/test/CodeGen/NVPTX/surf-write.ll
    llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
    llvm/test/CodeGen/NVPTX/tex-read.ll
    llvm/test/CodeGen/NVPTX/texsurf-queries.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 00913e93cfd35..dd4290a605a9c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2348,508 +2348,508 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
   switch (N->getOpcode()) {
   default: return false;
   case NVPTXISD::Tex1DFloatS32:
-    Opc = NVPTX::TEX_1D_F32_S32;
+    Opc = NVPTX::TEX_1D_F32_S32_RR;
     break;
   case NVPTXISD::Tex1DFloatFloat:
-    Opc = NVPTX::TEX_1D_F32_F32;
+    Opc = NVPTX::TEX_1D_F32_F32_RR;
     break;
   case NVPTXISD::Tex1DFloatFloatLevel:
-    Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex1DFloatFloatGrad:
-    Opc = NVPTX::TEX_1D_F32_F32_GRAD;
+    Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex1DS32S32:
-    Opc = NVPTX::TEX_1D_S32_S32;
+    Opc = NVPTX::TEX_1D_S32_S32_RR;
     break;
   case NVPTXISD::Tex1DS32Float:
-    Opc = NVPTX::TEX_1D_S32_F32;
+    Opc = NVPTX::TEX_1D_S32_F32_RR;
     break;
   case NVPTXISD::Tex1DS32FloatLevel:
-    Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex1DS32FloatGrad:
-    Opc = NVPTX::TEX_1D_S32_F32_GRAD;
+    Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex1DU32S32:
-    Opc = NVPTX::TEX_1D_U32_S32;
+    Opc = NVPTX::TEX_1D_U32_S32_RR;
     break;
   case NVPTXISD::Tex1DU32Float:
-    Opc = NVPTX::TEX_1D_U32_F32;
+    Opc = NVPTX::TEX_1D_U32_F32_RR;
     break;
   case NVPTXISD::Tex1DU32FloatLevel:
-    Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex1DU32FloatGrad:
-    Opc = NVPTX::TEX_1D_U32_F32_GRAD;
+    Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex1DArrayFloatS32:
-    Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
+    Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
     break;
   case NVPTXISD::Tex1DArrayFloatFloat:
-    Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
+    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
     break;
   case NVPTXISD::Tex1DArrayFloatFloatLevel:
-    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex1DArrayFloatFloatGrad:
-    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
+    Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex1DArrayS32S32:
-    Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
+    Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
     break;
   case NVPTXISD::Tex1DArrayS32Float:
-    Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
+    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
     break;
   case NVPTXISD::Tex1DArrayS32FloatLevel:
-    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex1DArrayS32FloatGrad:
-    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
+    Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex1DArrayU32S32:
-    Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
+    Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
     break;
   case NVPTXISD::Tex1DArrayU32Float:
-    Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
+    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
     break;
   case NVPTXISD::Tex1DArrayU32FloatLevel:
-    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex1DArrayU32FloatGrad:
-    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
+    Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex2DFloatS32:
-    Opc = NVPTX::TEX_2D_F32_S32;
+    Opc = NVPTX::TEX_2D_F32_S32_RR;
     break;
   case NVPTXISD::Tex2DFloatFloat:
-    Opc = NVPTX::TEX_2D_F32_F32;
+    Opc = NVPTX::TEX_2D_F32_F32_RR;
     break;
   case NVPTXISD::Tex2DFloatFloatLevel:
-    Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex2DFloatFloatGrad:
-    Opc = NVPTX::TEX_2D_F32_F32_GRAD;
+    Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex2DS32S32:
-    Opc = NVPTX::TEX_2D_S32_S32;
+    Opc = NVPTX::TEX_2D_S32_S32_RR;
     break;
   case NVPTXISD::Tex2DS32Float:
-    Opc = NVPTX::TEX_2D_S32_F32;
+    Opc = NVPTX::TEX_2D_S32_F32_RR;
     break;
   case NVPTXISD::Tex2DS32FloatLevel:
-    Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex2DS32FloatGrad:
-    Opc = NVPTX::TEX_2D_S32_F32_GRAD;
+    Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex2DU32S32:
-    Opc = NVPTX::TEX_2D_U32_S32;
+    Opc = NVPTX::TEX_2D_U32_S32_RR;
     break;
   case NVPTXISD::Tex2DU32Float:
-    Opc = NVPTX::TEX_2D_U32_F32;
+    Opc = NVPTX::TEX_2D_U32_F32_RR;
     break;
   case NVPTXISD::Tex2DU32FloatLevel:
-    Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex2DU32FloatGrad:
-    Opc = NVPTX::TEX_2D_U32_F32_GRAD;
+    Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex2DArrayFloatS32:
-    Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
+    Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
     break;
   case NVPTXISD::Tex2DArrayFloatFloat:
-    Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
+    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
     break;
   case NVPTXISD::Tex2DArrayFloatFloatLevel:
-    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex2DArrayFloatFloatGrad:
-    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
+    Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex2DArrayS32S32:
-    Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
+    Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
     break;
   case NVPTXISD::Tex2DArrayS32Float:
-    Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
+    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
     break;
   case NVPTXISD::Tex2DArrayS32FloatLevel:
-    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex2DArrayS32FloatGrad:
-    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
+    Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex2DArrayU32S32:
-    Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
+    Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
     break;
   case NVPTXISD::Tex2DArrayU32Float:
-    Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
+    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
     break;
   case NVPTXISD::Tex2DArrayU32FloatLevel:
-    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex2DArrayU32FloatGrad:
-    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
+    Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex3DFloatS32:
-    Opc = NVPTX::TEX_3D_F32_S32;
+    Opc = NVPTX::TEX_3D_F32_S32_RR;
     break;
   case NVPTXISD::Tex3DFloatFloat:
-    Opc = NVPTX::TEX_3D_F32_F32;
+    Opc = NVPTX::TEX_3D_F32_F32_RR;
     break;
   case NVPTXISD::Tex3DFloatFloatLevel:
-    Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex3DFloatFloatGrad:
-    Opc = NVPTX::TEX_3D_F32_F32_GRAD;
+    Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex3DS32S32:
-    Opc = NVPTX::TEX_3D_S32_S32;
+    Opc = NVPTX::TEX_3D_S32_S32_RR;
     break;
   case NVPTXISD::Tex3DS32Float:
-    Opc = NVPTX::TEX_3D_S32_F32;
+    Opc = NVPTX::TEX_3D_S32_F32_RR;
     break;
   case NVPTXISD::Tex3DS32FloatLevel:
-    Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex3DS32FloatGrad:
-    Opc = NVPTX::TEX_3D_S32_F32_GRAD;
+    Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
     break;
   case NVPTXISD::Tex3DU32S32:
-    Opc = NVPTX::TEX_3D_U32_S32;
+    Opc = NVPTX::TEX_3D_U32_S32_RR;
     break;
   case NVPTXISD::Tex3DU32Float:
-    Opc = NVPTX::TEX_3D_U32_F32;
+    Opc = NVPTX::TEX_3D_U32_F32_RR;
     break;
   case NVPTXISD::Tex3DU32FloatLevel:
-    Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tex3DU32FloatGrad:
-    Opc = NVPTX::TEX_3D_U32_F32_GRAD;
+    Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
     break;
   case NVPTXISD::TexCubeFloatFloat:
-    Opc = NVPTX::TEX_CUBE_F32_F32;
+    Opc = NVPTX::TEX_CUBE_F32_F32_RR;
     break;
   case NVPTXISD::TexCubeFloatFloatLevel:
-    Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
     break;
   case NVPTXISD::TexCubeS32Float:
-    Opc = NVPTX::TEX_CUBE_S32_F32;
+    Opc = NVPTX::TEX_CUBE_S32_F32_RR;
     break;
   case NVPTXISD::TexCubeS32FloatLevel:
-    Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
     break;
   case NVPTXISD::TexCubeU32Float:
-    Opc = NVPTX::TEX_CUBE_U32_F32;
+    Opc = NVPTX::TEX_CUBE_U32_F32_RR;
     break;
   case NVPTXISD::TexCubeU32FloatLevel:
-    Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
     break;
   case NVPTXISD::TexCubeArrayFloatFloat:
-    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
+    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
     break;
   case NVPTXISD::TexCubeArrayFloatFloatLevel:
-    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
     break;
   case NVPTXISD::TexCubeArrayS32Float:
-    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
+    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
     break;
   case NVPTXISD::TexCubeArrayS32FloatLevel:
-    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
     break;
   case NVPTXISD::TexCubeArrayU32Float:
-    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
+    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
     break;
   case NVPTXISD::TexCubeArrayU32FloatLevel:
-    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
     break;
   case NVPTXISD::Tld4R2DFloatFloat:
-    Opc = NVPTX::TLD4_R_2D_F32_F32;
+    Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
     break;
   case NVPTXISD::Tld4G2DFloatFloat:
-    Opc = NVPTX::TLD4_G_2D_F32_F32;
+    Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
     break;
   case NVPTXISD::Tld4B2DFloatFloat:
-    Opc = NVPTX::TLD4_B_2D_F32_F32;
+    Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
     break;
   case NVPTXISD::Tld4A2DFloatFloat:
-    Opc = NVPTX::TLD4_A_2D_F32_F32;
+    Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
     break;
   case NVPTXISD::Tld4R2DS64Float:
-    Opc = NVPTX::TLD4_R_2D_S32_F32;
+    Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
     break;
   case NVPTXISD::Tld4G2DS64Float:
-    Opc = NVPTX::TLD4_G_2D_S32_F32;
+    Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
     break;
   case NVPTXISD::Tld4B2DS64Float:
-    Opc = NVPTX::TLD4_B_2D_S32_F32;
+    Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
     break;
   case NVPTXISD::Tld4A2DS64Float:
-    Opc = NVPTX::TLD4_A_2D_S32_F32;
+    Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
     break;
   case NVPTXISD::Tld4R2DU64Float:
-    Opc = NVPTX::TLD4_R_2D_U32_F32;
+    Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
     break;
   case NVPTXISD::Tld4G2DU64Float:
-    Opc = NVPTX::TLD4_G_2D_U32_F32;
+    Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
     break;
   case NVPTXISD::Tld4B2DU64Float:
-    Opc = NVPTX::TLD4_B_2D_U32_F32;
+    Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
     break;
   case NVPTXISD::Tld4A2DU64Float:
-    Opc = NVPTX::TLD4_A_2D_U32_F32;
+    Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
     break;
   case NVPTXISD::TexUnified1DFloatS32:
-    Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
+    Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
     break;
   case NVPTXISD::TexUnified1DFloatFloat:
-    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
+    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
     break;
   case NVPTXISD::TexUnified1DFloatFloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified1DFloatFloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified1DS32S32:
-    Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
+    Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
     break;
   case NVPTXISD::TexUnified1DS32Float:
-    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
+    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
     break;
   case NVPTXISD::TexUnified1DS32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified1DS32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified1DU32S32:
-    Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
+    Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
     break;
   case NVPTXISD::TexUnified1DU32Float:
-    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
+    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
     break;
   case NVPTXISD::TexUnified1DU32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified1DU32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified1DArrayFloatS32:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
     break;
   case NVPTXISD::TexUnified1DArrayFloatFloat:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
     break;
   case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified1DArrayS32S32:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
     break;
   case NVPTXISD::TexUnified1DArrayS32Float:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
     break;
   case NVPTXISD::TexUnified1DArrayS32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified1DArrayS32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified1DArrayU32S32:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
     break;
   case NVPTXISD::TexUnified1DArrayU32Float:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
     break;
   case NVPTXISD::TexUnified1DArrayU32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified1DArrayU32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified2DFloatS32:
-    Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
+    Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
     break;
   case NVPTXISD::TexUnified2DFloatFloat:
-    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
+    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
     break;
   case NVPTXISD::TexUnified2DFloatFloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified2DFloatFloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified2DS32S32:
-    Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
+    Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
     break;
   case NVPTXISD::TexUnified2DS32Float:
-    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
+    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
     break;
   case NVPTXISD::TexUnified2DS32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified2DS32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified2DU32S32:
-    Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
+    Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
     break;
   case NVPTXISD::TexUnified2DU32Float:
-    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
+    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
     break;
   case NVPTXISD::TexUnified2DU32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified2DU32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified2DArrayFloatS32:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
     break;
   case NVPTXISD::TexUnified2DArrayFloatFloat:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
     break;
   case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified2DArrayS32S32:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
     break;
   case NVPTXISD::TexUnified2DArrayS32Float:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
     break;
   case NVPTXISD::TexUnified2DArrayS32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified2DArrayS32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified2DArrayU32S32:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
     break;
   case NVPTXISD::TexUnified2DArrayU32Float:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
     break;
   case NVPTXISD::TexUnified2DArrayU32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified2DArrayU32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified3DFloatS32:
-    Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
+    Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
     break;
   case NVPTXISD::TexUnified3DFloatFloat:
-    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
+    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
     break;
   case NVPTXISD::TexUnified3DFloatFloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified3DFloatFloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified3DS32S32:
-    Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
+    Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
     break;
   case NVPTXISD::TexUnified3DS32Float:
-    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
+    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
     break;
   case NVPTXISD::TexUnified3DS32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified3DS32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnified3DU32S32:
-    Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
+    Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
     break;
   case NVPTXISD::TexUnified3DU32Float:
-    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
+    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
     break;
   case NVPTXISD::TexUnified3DU32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnified3DU32FloatGrad:
-    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
+    Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
     break;
   case NVPTXISD::TexUnifiedCubeFloatFloat:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
     break;
   case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnifiedCubeS32Float:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
     break;
   case NVPTXISD::TexUnifiedCubeS32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnifiedCubeU32Float:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
     break;
   case NVPTXISD::TexUnifiedCubeU32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
     break;
   case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnifiedCubeArrayS32Float:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
     break;
   case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
     break;
   case NVPTXISD::TexUnifiedCubeArrayU32Float:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
     break;
   case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
-    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
+    Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
     break;
   case NVPTXISD::Tld4UnifiedR2DFloatFloat:
-    Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedG2DFloatFloat:
-    Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedB2DFloatFloat:
-    Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedA2DFloatFloat:
-    Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedR2DS64Float:
-    Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedG2DS64Float:
-    Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedB2DS64Float:
-    Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedA2DS64Float:
-    Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedR2DU64Float:
-    Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedG2DU64Float:
-    Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedB2DU64Float:
-    Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
     break;
   case NVPTXISD::Tld4UnifiedA2DU64Float:
-    Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
+    Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
     break;
   }
 
@@ -2866,499 +2866,499 @@ bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
   switch (N->getOpcode()) {
   default: return false;
   case NVPTXISD::Suld1DI8Clamp:
-    Opc = NVPTX::SULD_1D_I8_CLAMP;
+    Opc = NVPTX::SULD_1D_I8_CLAMP_R;
     break;
   case NVPTXISD::Suld1DI16Clamp:
-    Opc = NVPTX::SULD_1D_I16_CLAMP;
+    Opc = NVPTX::SULD_1D_I16_CLAMP_R;
     break;
   case NVPTXISD::Suld1DI32Clamp:
-    Opc = NVPTX::SULD_1D_I32_CLAMP;
+    Opc = NVPTX::SULD_1D_I32_CLAMP_R;
     break;
   case NVPTXISD::Suld1DI64Clamp:
-    Opc = NVPTX::SULD_1D_I64_CLAMP;
+    Opc = NVPTX::SULD_1D_I64_CLAMP_R;
     break;
   case NVPTXISD::Suld1DV2I8Clamp:
-    Opc = NVPTX::SULD_1D_V2I8_CLAMP;
+    Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
     break;
   case NVPTXISD::Suld1DV2I16Clamp:
-    Opc = NVPTX::SULD_1D_V2I16_CLAMP;
+    Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
     break;
   case NVPTXISD::Suld1DV2I32Clamp:
-    Opc = NVPTX::SULD_1D_V2I32_CLAMP;
+    Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
     break;
   case NVPTXISD::Suld1DV2I64Clamp:
-    Opc = NVPTX::SULD_1D_V2I64_CLAMP;
+    Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
     break;
   case NVPTXISD::Suld1DV4I8Clamp:
-    Opc = NVPTX::SULD_1D_V4I8_CLAMP;
+    Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
     break;
   case NVPTXISD::Suld1DV4I16Clamp:
-    Opc = NVPTX::SULD_1D_V4I16_CLAMP;
+    Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
     break;
   case NVPTXISD::Suld1DV4I32Clamp:
-    Opc = NVPTX::SULD_1D_V4I32_CLAMP;
+    Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayI8Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayI16Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayI32Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayI64Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayV2I8Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayV2I16Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayV2I32Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayV2I64Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayV4I8Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayV4I16Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
     break;
   case NVPTXISD::Suld1DArrayV4I32Clamp:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
     break;
   case NVPTXISD::Suld2DI8Clamp:
-    Opc = NVPTX::SULD_2D_I8_CLAMP;
+    Opc = NVPTX::SULD_2D_I8_CLAMP_R;
     break;
   case NVPTXISD::Suld2DI16Clamp:
-    Opc = NVPTX::SULD_2D_I16_CLAMP;
+    Opc = NVPTX::SULD_2D_I16_CLAMP_R;
     break;
   case NVPTXISD::Suld2DI32Clamp:
-    Opc = NVPTX::SULD_2D_I32_CLAMP;
+    Opc = NVPTX::SULD_2D_I32_CLAMP_R;
     break;
   case NVPTXISD::Suld2DI64Clamp:
-    Opc = NVPTX::SULD_2D_I64_CLAMP;
+    Opc = NVPTX::SULD_2D_I64_CLAMP_R;
     break;
   case NVPTXISD::Suld2DV2I8Clamp:
-    Opc = NVPTX::SULD_2D_V2I8_CLAMP;
+    Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
     break;
   case NVPTXISD::Suld2DV2I16Clamp:
-    Opc = NVPTX::SULD_2D_V2I16_CLAMP;
+    Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
     break;
   case NVPTXISD::Suld2DV2I32Clamp:
-    Opc = NVPTX::SULD_2D_V2I32_CLAMP;
+    Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
     break;
   case NVPTXISD::Suld2DV2I64Clamp:
-    Opc = NVPTX::SULD_2D_V2I64_CLAMP;
+    Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
     break;
   case NVPTXISD::Suld2DV4I8Clamp:
-    Opc = NVPTX::SULD_2D_V4I8_CLAMP;
+    Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
     break;
   case NVPTXISD::Suld2DV4I16Clamp:
-    Opc = NVPTX::SULD_2D_V4I16_CLAMP;
+    Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
     break;
   case NVPTXISD::Suld2DV4I32Clamp:
-    Opc = NVPTX::SULD_2D_V4I32_CLAMP;
+    Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayI8Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayI16Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayI32Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayI64Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayV2I8Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayV2I16Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayV2I32Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayV2I64Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayV4I8Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayV4I16Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
     break;
   case NVPTXISD::Suld2DArrayV4I32Clamp:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
     break;
   case NVPTXISD::Suld3DI8Clamp:
-    Opc = NVPTX::SULD_3D_I8_CLAMP;
+    Opc = NVPTX::SULD_3D_I8_CLAMP_R;
     break;
   case NVPTXISD::Suld3DI16Clamp:
-    Opc = NVPTX::SULD_3D_I16_CLAMP;
+    Opc = NVPTX::SULD_3D_I16_CLAMP_R;
     break;
   case NVPTXISD::Suld3DI32Clamp:
-    Opc = NVPTX::SULD_3D_I32_CLAMP;
+    Opc = NVPTX::SULD_3D_I32_CLAMP_R;
     break;
   case NVPTXISD::Suld3DI64Clamp:
-    Opc = NVPTX::SULD_3D_I64_CLAMP;
+    Opc = NVPTX::SULD_3D_I64_CLAMP_R;
     break;
   case NVPTXISD::Suld3DV2I8Clamp:
-    Opc = NVPTX::SULD_3D_V2I8_CLAMP;
+    Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
     break;
   case NVPTXISD::Suld3DV2I16Clamp:
-    Opc = NVPTX::SULD_3D_V2I16_CLAMP;
+    Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
     break;
   case NVPTXISD::Suld3DV2I32Clamp:
-    Opc = NVPTX::SULD_3D_V2I32_CLAMP;
+    Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
     break;
   case NVPTXISD::Suld3DV2I64Clamp:
-    Opc = NVPTX::SULD_3D_V2I64_CLAMP;
+    Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
     break;
   case NVPTXISD::Suld3DV4I8Clamp:
-    Opc = NVPTX::SULD_3D_V4I8_CLAMP;
+    Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
     break;
   case NVPTXISD::Suld3DV4I16Clamp:
-    Opc = NVPTX::SULD_3D_V4I16_CLAMP;
+    Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
     break;
   case NVPTXISD::Suld3DV4I32Clamp:
-    Opc = NVPTX::SULD_3D_V4I32_CLAMP;
+    Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
     break;
   case NVPTXISD::Suld1DI8Trap:
-    Opc = NVPTX::SULD_1D_I8_TRAP;
+    Opc = NVPTX::SULD_1D_I8_TRAP_R;
     break;
   case NVPTXISD::Suld1DI16Trap:
-    Opc = NVPTX::SULD_1D_I16_TRAP;
+    Opc = NVPTX::SULD_1D_I16_TRAP_R;
     break;
   case NVPTXISD::Suld1DI32Trap:
-    Opc = NVPTX::SULD_1D_I32_TRAP;
+    Opc = NVPTX::SULD_1D_I32_TRAP_R;
     break;
   case NVPTXISD::Suld1DI64Trap:
-    Opc = NVPTX::SULD_1D_I64_TRAP;
+    Opc = NVPTX::SULD_1D_I64_TRAP_R;
     break;
   case NVPTXISD::Suld1DV2I8Trap:
-    Opc = NVPTX::SULD_1D_V2I8_TRAP;
+    Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
     break;
   case NVPTXISD::Suld1DV2I16Trap:
-    Opc = NVPTX::SULD_1D_V2I16_TRAP;
+    Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
     break;
   case NVPTXISD::Suld1DV2I32Trap:
-    Opc = NVPTX::SULD_1D_V2I32_TRAP;
+    Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
     break;
   case NVPTXISD::Suld1DV2I64Trap:
-    Opc = NVPTX::SULD_1D_V2I64_TRAP;
+    Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
     break;
   case NVPTXISD::Suld1DV4I8Trap:
-    Opc = NVPTX::SULD_1D_V4I8_TRAP;
+    Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
     break;
   case NVPTXISD::Suld1DV4I16Trap:
-    Opc = NVPTX::SULD_1D_V4I16_TRAP;
+    Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
     break;
   case NVPTXISD::Suld1DV4I32Trap:
-    Opc = NVPTX::SULD_1D_V4I32_TRAP;
+    Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayI8Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayI16Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayI32Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayI64Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayV2I8Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayV2I16Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayV2I32Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayV2I64Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayV4I8Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayV4I16Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
     break;
   case NVPTXISD::Suld1DArrayV4I32Trap:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
     break;
   case NVPTXISD::Suld2DI8Trap:
-    Opc = NVPTX::SULD_2D_I8_TRAP;
+    Opc = NVPTX::SULD_2D_I8_TRAP_R;
     break;
   case NVPTXISD::Suld2DI16Trap:
-    Opc = NVPTX::SULD_2D_I16_TRAP;
+    Opc = NVPTX::SULD_2D_I16_TRAP_R;
     break;
   case NVPTXISD::Suld2DI32Trap:
-    Opc = NVPTX::SULD_2D_I32_TRAP;
+    Opc = NVPTX::SULD_2D_I32_TRAP_R;
     break;
   case NVPTXISD::Suld2DI64Trap:
-    Opc = NVPTX::SULD_2D_I64_TRAP;
+    Opc = NVPTX::SULD_2D_I64_TRAP_R;
     break;
   case NVPTXISD::Suld2DV2I8Trap:
-    Opc = NVPTX::SULD_2D_V2I8_TRAP;
+    Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
     break;
   case NVPTXISD::Suld2DV2I16Trap:
-    Opc = NVPTX::SULD_2D_V2I16_TRAP;
+    Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
     break;
   case NVPTXISD::Suld2DV2I32Trap:
-    Opc = NVPTX::SULD_2D_V2I32_TRAP;
+    Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
     break;
   case NVPTXISD::Suld2DV2I64Trap:
-    Opc = NVPTX::SULD_2D_V2I64_TRAP;
+    Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
     break;
   case NVPTXISD::Suld2DV4I8Trap:
-    Opc = NVPTX::SULD_2D_V4I8_TRAP;
+    Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
     break;
   case NVPTXISD::Suld2DV4I16Trap:
-    Opc = NVPTX::SULD_2D_V4I16_TRAP;
+    Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
     break;
   case NVPTXISD::Suld2DV4I32Trap:
-    Opc = NVPTX::SULD_2D_V4I32_TRAP;
+    Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayI8Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayI16Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayI32Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayI64Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayV2I8Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayV2I16Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayV2I32Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayV2I64Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayV4I8Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayV4I16Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
     break;
   case NVPTXISD::Suld2DArrayV4I32Trap:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
     break;
   case NVPTXISD::Suld3DI8Trap:
-    Opc = NVPTX::SULD_3D_I8_TRAP;
+    Opc = NVPTX::SULD_3D_I8_TRAP_R;
     break;
   case NVPTXISD::Suld3DI16Trap:
-    Opc = NVPTX::SULD_3D_I16_TRAP;
+    Opc = NVPTX::SULD_3D_I16_TRAP_R;
     break;
   case NVPTXISD::Suld3DI32Trap:
-    Opc = NVPTX::SULD_3D_I32_TRAP;
+    Opc = NVPTX::SULD_3D_I32_TRAP_R;
     break;
   case NVPTXISD::Suld3DI64Trap:
-    Opc = NVPTX::SULD_3D_I64_TRAP;
+    Opc = NVPTX::SULD_3D_I64_TRAP_R;
     break;
   case NVPTXISD::Suld3DV2I8Trap:
-    Opc = NVPTX::SULD_3D_V2I8_TRAP;
+    Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
     break;
   case NVPTXISD::Suld3DV2I16Trap:
-    Opc = NVPTX::SULD_3D_V2I16_TRAP;
+    Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
     break;
   case NVPTXISD::Suld3DV2I32Trap:
-    Opc = NVPTX::SULD_3D_V2I32_TRAP;
+    Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
     break;
   case NVPTXISD::Suld3DV2I64Trap:
-    Opc = NVPTX::SULD_3D_V2I64_TRAP;
+    Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
     break;
   case NVPTXISD::Suld3DV4I8Trap:
-    Opc = NVPTX::SULD_3D_V4I8_TRAP;
+    Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
     break;
   case NVPTXISD::Suld3DV4I16Trap:
-    Opc = NVPTX::SULD_3D_V4I16_TRAP;
+    Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
     break;
   case NVPTXISD::Suld3DV4I32Trap:
-    Opc = NVPTX::SULD_3D_V4I32_TRAP;
+    Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
     break;
   case NVPTXISD::Suld1DI8Zero:
-    Opc = NVPTX::SULD_1D_I8_ZERO;
+    Opc = NVPTX::SULD_1D_I8_ZERO_R;
     break;
   case NVPTXISD::Suld1DI16Zero:
-    Opc = NVPTX::SULD_1D_I16_ZERO;
+    Opc = NVPTX::SULD_1D_I16_ZERO_R;
     break;
   case NVPTXISD::Suld1DI32Zero:
-    Opc = NVPTX::SULD_1D_I32_ZERO;
+    Opc = NVPTX::SULD_1D_I32_ZERO_R;
     break;
   case NVPTXISD::Suld1DI64Zero:
-    Opc = NVPTX::SULD_1D_I64_ZERO;
+    Opc = NVPTX::SULD_1D_I64_ZERO_R;
     break;
   case NVPTXISD::Suld1DV2I8Zero:
-    Opc = NVPTX::SULD_1D_V2I8_ZERO;
+    Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
     break;
   case NVPTXISD::Suld1DV2I16Zero:
-    Opc = NVPTX::SULD_1D_V2I16_ZERO;
+    Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
     break;
   case NVPTXISD::Suld1DV2I32Zero:
-    Opc = NVPTX::SULD_1D_V2I32_ZERO;
+    Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
     break;
   case NVPTXISD::Suld1DV2I64Zero:
-    Opc = NVPTX::SULD_1D_V2I64_ZERO;
+    Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
     break;
   case NVPTXISD::Suld1DV4I8Zero:
-    Opc = NVPTX::SULD_1D_V4I8_ZERO;
+    Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
     break;
   case NVPTXISD::Suld1DV4I16Zero:
-    Opc = NVPTX::SULD_1D_V4I16_ZERO;
+    Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
     break;
   case NVPTXISD::Suld1DV4I32Zero:
-    Opc = NVPTX::SULD_1D_V4I32_ZERO;
+    Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayI8Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayI16Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayI32Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayI64Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayV2I8Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayV2I16Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayV2I32Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayV2I64Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayV4I8Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayV4I16Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
     break;
   case NVPTXISD::Suld1DArrayV4I32Zero:
-    Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
+    Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
     break;
   case NVPTXISD::Suld2DI8Zero:
-    Opc = NVPTX::SULD_2D_I8_ZERO;
+    Opc = NVPTX::SULD_2D_I8_ZERO_R;
     break;
   case NVPTXISD::Suld2DI16Zero:
-    Opc = NVPTX::SULD_2D_I16_ZERO;
+    Opc = NVPTX::SULD_2D_I16_ZERO_R;
     break;
   case NVPTXISD::Suld2DI32Zero:
-    Opc = NVPTX::SULD_2D_I32_ZERO;
+    Opc = NVPTX::SULD_2D_I32_ZERO_R;
     break;
   case NVPTXISD::Suld2DI64Zero:
-    Opc = NVPTX::SULD_2D_I64_ZERO;
+    Opc = NVPTX::SULD_2D_I64_ZERO_R;
     break;
   case NVPTXISD::Suld2DV2I8Zero:
-    Opc = NVPTX::SULD_2D_V2I8_ZERO;
+    Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
     break;
   case NVPTXISD::Suld2DV2I16Zero:
-    Opc = NVPTX::SULD_2D_V2I16_ZERO;
+    Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
     break;
   case NVPTXISD::Suld2DV2I32Zero:
-    Opc = NVPTX::SULD_2D_V2I32_ZERO;
+    Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
     break;
   case NVPTXISD::Suld2DV2I64Zero:
-    Opc = NVPTX::SULD_2D_V2I64_ZERO;
+    Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
     break;
   case NVPTXISD::Suld2DV4I8Zero:
-    Opc = NVPTX::SULD_2D_V4I8_ZERO;
+    Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
     break;
   case NVPTXISD::Suld2DV4I16Zero:
-    Opc = NVPTX::SULD_2D_V4I16_ZERO;
+    Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
     break;
   case NVPTXISD::Suld2DV4I32Zero:
-    Opc = NVPTX::SULD_2D_V4I32_ZERO;
+    Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayI8Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayI16Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayI32Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayI64Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayV2I8Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayV2I16Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayV2I32Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayV2I64Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayV4I8Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayV4I16Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
     break;
   case NVPTXISD::Suld2DArrayV4I32Zero:
-    Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
+    Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
     break;
   case NVPTXISD::Suld3DI8Zero:
-    Opc = NVPTX::SULD_3D_I8_ZERO;
+    Opc = NVPTX::SULD_3D_I8_ZERO_R;
     break;
   case NVPTXISD::Suld3DI16Zero:
-    Opc = NVPTX::SULD_3D_I16_ZERO;
+    Opc = NVPTX::SULD_3D_I16_ZERO_R;
     break;
   case NVPTXISD::Suld3DI32Zero:
-    Opc = NVPTX::SULD_3D_I32_ZERO;
+    Opc = NVPTX::SULD_3D_I32_ZERO_R;
     break;
   case NVPTXISD::Suld3DI64Zero:
-    Opc = NVPTX::SULD_3D_I64_ZERO;
+    Opc = NVPTX::SULD_3D_I64_ZERO_R;
     break;
   case NVPTXISD::Suld3DV2I8Zero:
-    Opc = NVPTX::SULD_3D_V2I8_ZERO;
+    Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
     break;
   case NVPTXISD::Suld3DV2I16Zero:
-    Opc = NVPTX::SULD_3D_V2I16_ZERO;
+    Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
     break;
   case NVPTXISD::Suld3DV2I32Zero:
-    Opc = NVPTX::SULD_3D_V2I32_ZERO;
+    Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
     break;
   case NVPTXISD::Suld3DV2I64Zero:
-    Opc = NVPTX::SULD_3D_V2I64_ZERO;
+    Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
     break;
   case NVPTXISD::Suld3DV4I8Zero:
-    Opc = NVPTX::SULD_3D_V4I8_ZERO;
+    Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
     break;
   case NVPTXISD::Suld3DV4I16Zero:
-    Opc = NVPTX::SULD_3D_V4I16_ZERO;
+    Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
     break;
   case NVPTXISD::Suld3DV4I32Zero:
-    Opc = NVPTX::SULD_3D_V4I32_ZERO;
+    Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
     break;
   }
 

diff  --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 0735a0fbf5f3d..511cd875ac55c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -2464,2303 +2464,1563 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
 // texmode_independent
 let IsTex = true, IsTexModeUnified = false in {
 // Texture fetch instructions using handles
-def TEX_1D_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
-              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
-              []>;
-def TEX_1D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
-              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
-              []>;
-def TEX_1D_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
-              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x\\}], $lod;",
-              []>;
-def TEX_1D_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-def TEX_1D_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
-              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
-              []>;
-def TEX_1D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
-              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
-              []>;
-def TEX_1D_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x\\}], $lod;",
-              []>;
-def TEX_1D_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-def TEX_1D_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
-              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
-              []>;
-def TEX_1D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
-              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
-              []>;
-def TEX_1D_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x\\}], $lod;",
-              []>;
-def TEX_1D_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-
-def TEX_1D_ARRAY_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}];",
-              []>;
-def TEX_1D_ARRAY_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
-              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}];",
-              []>;
-def TEX_1D_ARRAY_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}], $lod;",
-              []>;
-def TEX_1D_ARRAY_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-def TEX_1D_ARRAY_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}];",
-              []>;
-def TEX_1D_ARRAY_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
-              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}];",
-              []>;
-def TEX_1D_ARRAY_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}], $lod;",
-              []>;
-def TEX_1D_ARRAY_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-def TEX_1D_ARRAY_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}];",
-              []>;
-def TEX_1D_ARRAY_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
-              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}];",
-              []>;
-def TEX_1D_ARRAY_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}], $lod;",
-              []>;
-def TEX_1D_ARRAY_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
 
-def TEX_2D_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TEX_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TEX_2D_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$lod),
-              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}], $lod;",
-              []>;
-def TEX_2D_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
-def TEX_2D_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TEX_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TEX_2D_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$lod),
-              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}], $lod;",
-              []>;
-def TEX_2D_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
-def TEX_2D_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TEX_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TEX_2D_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$lod),
-              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}], $lod;",
-              []>;
-def TEX_2D_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
+class TEX_1D_base<string inst, NVPTXRegClass outtype,
+                  NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+                 []>;
+
+multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+  def _RR : TEX_1D_base<inst, outtype, intype,
+                        (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_1D_base<inst, outtype, intype,
+                        (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_1D_base<inst, outtype, intype,
+                        (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_1D_base<inst, outtype, intype,
+                        (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_2D_ARRAY_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
-                   Int32Regs:$y),
-              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_2D_ARRAY_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y),
-              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_2D_ARRAY_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y, Float32Regs:$lod),
-              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
-              []>;
-def TEX_2D_ARRAY_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
-def TEX_2D_ARRAY_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
-                   Int32Regs:$y),
-              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_2D_ARRAY_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y),
-              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_2D_ARRAY_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y, Float32Regs:$lod),
-              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
-              []>;
-def TEX_2D_ARRAY_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
-def TEX_2D_ARRAY_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
-                   Int32Regs:$y),
-              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_2D_ARRAY_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y),
-              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_2D_ARRAY_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y, Float32Regs:$lod),
-              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
-              []>;
-def TEX_2D_ARRAY_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
+defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
+                 []>;
+
+multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype> {
+  def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_3D_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$z),
-              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_3D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z),
-              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_3D_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z, Float32Regs:$lod),
-              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_3D_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$gradx2, Float32Regs:$grady0,
-                   Float32Regs:$grady1, Float32Regs:$grady2),
-              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], "
-              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
-              "\\{$grady0, $grady1, $grady2, $grady2\\};",
-              []>;
-def TEX_3D_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$z),
-              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_3D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z),
-              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_3D_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z, Float32Regs:$lod),
-              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_3D_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$gradx2, Float32Regs:$grady0,
-                   Float32Regs:$grady1, Float32Regs:$grady2),
-              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], "
-              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
-              "\\{$grady0, $grady1, $grady2, $grady2\\};",
-              []>;
-def TEX_3D_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$z),
-              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_3D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z),
-              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_3D_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z, Float32Regs:$lod),
-              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_3D_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$gradx2, Float32Regs:$grady0,
-                   Float32Regs:$grady1, Float32Regs:$grady2),
-              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], "
-              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
-              "\\{$grady0, $grady1, $grady2, $grady2\\};",
-              []>;
+defm TEX_1D_F32_F32_LEVEL :
+  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_F32_LEVEL :
+  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_F32_LEVEL :
+  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
+                       NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
+                        " \\{$gradx\\}, \\{$grady\\};",
+                 []>;
+
+multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
+                       NVPTXRegClass intype> {
+  def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
+                             (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
+                             (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
+                             (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
+                             (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_CUBE_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s,
-               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_CUBE_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_CUBE_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_CUBE_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_CUBE_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_CUBE_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
+defm TEX_1D_F32_F32_GRAD
+  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_F32_GRAD
+  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_F32_GRAD
+  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
+                 []>;
+
+multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype> {
+  def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_CUBE_ARRAY_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
-               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $z\\}];",
-              []>;
-def TEX_CUBE_ARRAY_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
-              []>;
-def TEX_CUBE_ARRAY_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $z\\}];",
-              []>;
-def TEX_CUBE_ARRAY_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
-              []>;
-def TEX_CUBE_ARRAY_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $z\\}];",
-              []>;
-def TEX_CUBE_ARRAY_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
-              []>;
+defm TEX_1D_ARRAY_F32_F32
+  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_F32_S32
+  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_S32_S32
+  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_S32_F32
+  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_S32
+  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_U32_F32
+  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+                              NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$l, $x\\}], $lod;",
+                 []>;
+
+multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+                              NVPTXRegClass intype> {
+  def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+                                    (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+                                    (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+                                    (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+                                    (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TLD4_R_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
-                    Float32Regs:$v2, Float32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_G_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
-                    Float32Regs:$v2, Float32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_B_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
-                    Float32Regs:$v2, Float32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_A_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
-                    Float32Regs:$v2, Float32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_R_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_G_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_B_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_A_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_R_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_G_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_B_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
-def TLD4_A_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
-              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, $s, \\{$x, $y\\}];",
-              []>;
+defm TEX_1D_ARRAY_F32_F32_LEVEL
+  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_S32_F32_LEVEL
+  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_F32_LEVEL
+  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+                             NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
+                                    intype:$gradx, intype:$grady)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
+                        " \\{$gradx\\}, \\{$grady\\};",
+                 []>;
+
+multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+                             NVPTXRegClass intype> {
+  def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+                                   (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+                                   (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+                                   (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+                                   (ins i64imm:$t, i64imm:$s)>;
 }
 
+defm TEX_1D_ARRAY_F32_F32_GRAD
+  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_S32_F32_GRAD
+  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_F32_GRAD
+  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_base<string inst, NVPTXRegClass outtype,
+                  NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$y)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
+                 []>;
+
+multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+  def _RR : TEX_2D_base<inst, outtype, intype,
+                        (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
+}
 
-// texmode_unified
-let IsTex = true, IsTexModeUnified = true in {
-// Texture fetch instructions using handles
-def TEX_UNIFIED_1D_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x),
-              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
-              []>;
-def TEX_UNIFIED_1D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x),
-              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
-              []>;
-def TEX_UNIFIED_1D_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
-              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x\\}], $lod;",
-              []>;
-def TEX_UNIFIED_1D_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-def TEX_UNIFIED_1D_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x),
-              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
-              []>;
-def TEX_UNIFIED_1D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x),
-              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
-              []>;
-def TEX_UNIFIED_1D_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x\\}], $lod;",
-              []>;
-def TEX_UNIFIED_1D_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-def TEX_UNIFIED_1D_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x),
-              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
-              []>;
-def TEX_UNIFIED_1D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x),
-              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
-              []>;
-def TEX_UNIFIED_1D_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x\\}], $lod;",
-              []>;
-def TEX_UNIFIED_1D_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
+defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$x, $y\\}], $lod;",
+                 []>;
+
+multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype> {
+  def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_UNIFIED_1D_ARRAY_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
-              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}];",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
-              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}];",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}], $lod;",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
-              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}];",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
-              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}];",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}], $lod;",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
-              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}];",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
-              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}];",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$lod),
-              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}], $lod;",
-              []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$gradx, Float32Regs:$grady),
-              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
-              []>;
+defm TEX_2D_F32_F32_LEVEL :
+  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_S32_F32_LEVEL :
+  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_F32_LEVEL :
+  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
+                       NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$y,
+                                    intype:$gradx0, intype:$gradx1,
+                                    intype:$grady0, intype:$grady1)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
+                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+                 []>;
+
+multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
+                       NVPTXRegClass intype> {
+  def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_UNIFIED_2D_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
-              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$lod),
-              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}], $lod;",
-              []>;
-def TEX_UNIFIED_2D_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
-def TEX_UNIFIED_2D_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
-              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$lod),
-              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}], $lod;",
-              []>;
-def TEX_UNIFIED_2D_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
-def TEX_UNIFIED_2D_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
-              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$lod),
-              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}], $lod;",
-              []>;
-def TEX_UNIFIED_2D_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
+defm TEX_2D_F32_F32_GRAD :
+  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_S32_F32_GRAD :
+  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_F32_GRAD :
+  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
+                 []>;
+
+multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype> {
+  def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_UNIFIED_2D_ARRAY_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
-                   Int32Regs:$y),
-              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y),
-              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y, Float32Regs:$lod),
-              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
-                   Int32Regs:$y),
-              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y),
-              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y, Float32Regs:$lod),
-              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
-                   Int32Regs:$y),
-              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y),
-              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}];",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y, Float32Regs:$lod),
-              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
-              []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
-                   Float32Regs:$y,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$grady0, Float32Regs:$grady1),
-              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
-              "\\{$grady0, $grady1\\};",
-              []>;
+defm TEX_2D_ARRAY_F32_F32
+  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_F32_S32
+  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_S32_S32
+  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_S32_F32
+  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_S32
+  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_U32_F32
+  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+                              NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+                                    intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+                 []>;
+
+multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+                              NVPTXRegClass intype> {
+  def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_UNIFIED_3D_F32_S32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$z),
-              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_3D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z),
-              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_3D_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z, Float32Regs:$lod),
-              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_UNIFIED_3D_F32_F32_GRAD
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$gradx2, Float32Regs:$grady0,
-                   Float32Regs:$grady1, Float32Regs:$grady2),
-              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], "
-              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
-              "\\{$grady0, $grady1, $grady2, $grady2\\};",
-              []>;
-def TEX_UNIFIED_3D_S32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$z),
-              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_3D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z),
-              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_3D_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z, Float32Regs:$lod),
-              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_UNIFIED_3D_S32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$gradx2, Float32Regs:$grady0,
-                   Float32Regs:$grady1, Float32Regs:$grady2),
-              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], "
-              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
-              "\\{$grady0, $grady1, $grady2, $grady2\\};",
-              []>;
-def TEX_UNIFIED_3D_U32_S32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$z),
-              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_3D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z),
-              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_3D_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z, Float32Regs:$lod),
-              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_UNIFIED_3D_U32_F32_GRAD
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
-                   Float32Regs:$z,
-                   Float32Regs:$gradx0, Float32Regs:$gradx1,
-                   Float32Regs:$gradx2, Float32Regs:$grady0,
-                   Float32Regs:$grady1, Float32Regs:$grady2),
-              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], "
-              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
-              "\\{$grady0, $grady1, $grady2, $grady2\\};",
-              []>;
+defm TEX_2D_ARRAY_F32_F32_LEVEL
+  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_S32_F32_LEVEL
+  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_F32_LEVEL
+  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+                             NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+                                    intype:$gradx0, intype:$gradx1,
+                                    intype:$grady0, intype:$grady1)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
+                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+                 []>;
+
+multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+                             NVPTXRegClass intype> {
+  def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_UNIFIED_CUBE_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t,
-               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_CUBE_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_UNIFIED_CUBE_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_CUBE_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
-def TEX_UNIFIED_CUBE_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}];",
-              []>;
-def TEX_UNIFIED_CUBE_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
-              []>;
+defm TEX_2D_ARRAY_F32_F32_GRAD
+  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_S32_F32_GRAD
+  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_F32_GRAD
+  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_base<string inst, NVPTXRegClass outtype,
+                  NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
+                 []>;
+
+multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+  def _RR : TEX_3D_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_3D_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_3D_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_3D_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TEX_UNIFIED_CUBE_ARRAY_F32_F32
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l,
-               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $z\\}];",
-              []>;
-def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
-  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
-                    Float32Regs:$b, Float32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
-              []>;
-def TEX_UNIFIED_CUBE_ARRAY_S32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $z\\}];",
-              []>;
-def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
-              []>;
-def TEX_UNIFIED_CUBE_ARRAY_U32_F32
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
-              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $z\\}];",
-              []>;
-def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
-                    Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$t, Int32Regs:$l,
-                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
-                   Float32Regs:$lod),
-              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
-              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
-              []>;
+defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+                                    intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+                 []>;
+
+multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
+                        NVPTXRegClass intype> {
+  def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
+                              (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
+                              (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
+                              (ins i64imm:$t, i64imm:$s)>;
+}
 
-def TLD4_UNIFIED_R_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
-                    Float32Regs:$v2, Float32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_G_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
-                    Float32Regs:$v2, Float32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_B_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
-                    Float32Regs:$v2, Float32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_A_2D_F32_F32
-  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
-                    Float32Regs:$v2, Float32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_R_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_G_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_B_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_A_2D_S32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_R_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_G_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_B_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
-def TLD4_UNIFIED_A_2D_U32_F32
-  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
-                    Int32Regs:$v2, Int32Regs:$v3),
-              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
-              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
-              "[$t, \\{$x, $y\\}];",
-              []>;
+defm TEX_3D_F32_F32_LEVEL
+  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_S32_F32_LEVEL
+  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_F32_LEVEL
+  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
+                       NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+                                    intype :$gradx0, intype:$gradx1,
+                                    intype:$gradx2, intype:$grady0,
+                                    intype:$grady1, intype:$grady2)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
+                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
+                 []>;
+
+multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
+                       NVPTXRegClass intype> {
+  def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
+                             (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
+                             (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
+                             (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
+                             (ins i64imm:$t, i64imm:$s)>;
 }
 
+defm TEX_3D_F32_F32_GRAD
+  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_S32_F32_GRAD
+  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_F32_GRAD
+  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
+                    NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
+                 []>;
+
+multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+  def _RR : TEX_CUBE_base<inst, outtype, intype,
+                          (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_CUBE_base<inst, outtype, intype,
+                          (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_CUBE_base<inst, outtype, intype,
+                          (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_CUBE_base<inst, outtype, intype,
+                          (ins i64imm:$t, i64imm:$s)>;
+}
 
+defm TEX_CUBE_F32_F32
+  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_S32_F32
+  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_U32_F32
+  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+                                    intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+                 []>;
+
+multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype> {
+  def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+                                (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+                                (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+                                (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+                                (ins i64imm:$t, i64imm:$s)>;
+}
 
-//=== Surface load instructions
-// .clamp variant
-let IsSuld = true in {
-def SULD_1D_I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
+defm TEX_CUBE_F32_F32_LEVEL
+  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_S32_F32_LEVEL
+  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_U32_F32_LEVEL
+  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+                                    intype:$z)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
+                 []>;
+
+multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype> {
+  def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+                                (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+                                (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+                                (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+                                (ins i64imm:$t, i64imm:$s)>;
+}
 
-def SULD_1D_ARRAY_I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
+defm TEX_CUBE_ARRAY_F32_F32
+  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_S32_F32
+  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_U32_F32
+  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+                                    intype:$z, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
+                 []>;
+
+multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype> {
+  def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+                                      (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+                                      (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+                                      (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+                                      (ins i64imm:$t, i64imm:$s)>;
+}
 
-def SULD_2D_I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
+defm TEX_CUBE_ARRAY_F32_F32_LEVEL
+  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
+                         Float32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_S32_F32_LEVEL
+  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
+                         Int32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_U32_F32_LEVEL
+  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
+                         Int32Regs, Float32Regs>;
+
+class TLD4_2D_base<string inst, NVPTXRegClass outtype,
+                   NVPTXRegClass intype, dag texsamp>
+    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
+                      outtype:$v2, outtype:$v3),
+                 !con(texsamp, (ins intype:$x, intype:$y)),
+                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
+                 []>;
+
+multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+  def _RR : TLD4_2D_base<inst, outtype, intype,
+                         (ins Int64Regs:$t, Int64Regs:$s)>;
+  def _RI : TLD4_2D_base<inst, outtype, intype,
+                         (ins Int64Regs:$t, i64imm:$s)>;
+  def _IR : TLD4_2D_base<inst, outtype, intype,
+                         (ins i64imm:$t, Int64Regs:$s)>;
+  def _II : TLD4_2D_base<inst, outtype, intype,
+                         (ins i64imm:$t, i64imm:$s)>;
+}
 
-def SULD_2D_ARRAY_I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
+defm TLD4_R_2D_F32_F32
+  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_G_2D_F32_F32
+  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_B_2D_F32_F32
+  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_A_2D_F32_F32
+  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+
+defm TLD4_R_2D_S32_F32
+  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_G_2D_S32_F32
+  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_B_2D_S32_F32
+  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_A_2D_S32_F32
+  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+
+defm TLD4_R_2D_U32_F32
+  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_G_2D_U32_F32
+  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_B_2D_U32_F32
+  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_A_2D_U32_F32
+  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
 
-def SULD_3D_I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
 }
 
-let IsSuld = 2 in {
-def SULD_1D_V2I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
 
-def SULD_1D_ARRAY_V2I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
+// texmode_unified
+let IsTex = true, IsTexModeUnified = true in {
+// Texture fetch instructions using handles
 
-def SULD_2D_V2I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
+class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+                 []>;
+
+multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
-def SULD_2D_ARRAY_V2I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
+defm TEX_UNIFIED_1D_F32_S32
+  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_F32_F32
+  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_S32
+  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_S32_F32
+  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_S32
+  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_U32_F32
+  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
+                 []>;
+
+multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
-def SULD_3D_V2I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I64_CLAMP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
+defm TEX_UNIFIED_1D_F32_F32_LEVEL
+  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_F32_LEVEL
+  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_F32_LEVEL
+  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
+                               NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+                 []>;
+
+multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
+                               NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
 }
 
-let IsSuld = 3 in {
-def SULD_1D_V4I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V4I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V4I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
+defm TEX_UNIFIED_1D_F32_F32_GRAD
+  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_F32_GRAD
+  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_F32_GRAD
+  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins Int32Regs:$l, intype:$x)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
+                 []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
-def SULD_1D_ARRAY_V4I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V4I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V4I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_S32
+  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32
+  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_S32
+  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32
+  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_S32
+  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32
+  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+                                      NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
+                 []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+                                      NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+                                           (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+                                           (ins i64imm:$t)>;
+}
 
-def SULD_2D_V4I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V4I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V4I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
+  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
+                               Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
+  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
+                               Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
+  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
+                               Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+                                     NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins Int32Regs:$l, intype:$x,
+                                intype:$gradx, intype:$grady)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+                 []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+                                     NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
+                                          (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
+                                          (ins i64imm:$t)>;
+}
 
-def SULD_2D_ARRAY_V4I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V4I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V4I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
+  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
+                              Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
+  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
+                              Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
+  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
+                              Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$y)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
+                 []>;
+
+multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
+defm TEX_UNIFIED_2D_F32_S32
+  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_F32_F32
+  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_S32
+  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_S32_F32
+  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_S32
+  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_U32_F32
+  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
+                 []>;
+
+multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
-def SULD_3D_V4I8_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V4I16_CLAMP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V4I32_CLAMP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
+defm TEX_UNIFIED_2D_F32_F32_LEVEL
+  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_F32_LEVEL
+  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_F32_LEVEL
+  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
+                               NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$y,
+                                intype:$gradx0, intype:$gradx1,
+                                intype:$grady0, intype:$grady1)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
+                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+                 []>;
+multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
+                               NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
 }
 
+defm TEX_UNIFIED_2D_F32_F32_GRAD
+  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_F32_GRAD
+  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_F32_GRAD
+  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
+                 []>;
+multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
-// .trap variant
-let IsSuld = true in {
-def SULD_1D_I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_S32
+  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32
+  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_S32
+  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32
+  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_S32
+  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32
+  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+                                      NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
+                                intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
+                 []>;
+multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+                                      NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+                                           (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+                                           (ins i64imm:$t)>;
+}
 
-def SULD_1D_ARRAY_I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
+  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
+                               Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
+  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
+                               Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
+  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
+                               Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+                                     NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
+                                intype:$gradx0, intype:$gradx1,
+                                intype:$grady0, intype:$grady1)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
+                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+                 []>;
+multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+                                     NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
+                                          (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
+                                          (ins i64imm:$t)>;
+}
 
-def SULD_2D_I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
+  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
+                              Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
+  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
+                              Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
+  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
+                              Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
+                 []>;
+multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
+                          NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
-def SULD_2D_ARRAY_I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
+defm TEX_UNIFIED_3D_F32_S32
+  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_F32_F32
+  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_S32
+  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_S32_F32
+  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_S32
+  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_U32_F32
+  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
+                 []>;
+multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
+                                NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
-def SULD_3D_I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
+defm TEX_UNIFIED_3D_F32_F32_LEVEL
+  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_F32_LEVEL
+  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_F32_LEVEL
+  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
+                               NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
+                                intype:$gradx0, intype:$gradx1,
+                                intype:$gradx2, intype:$grady0,
+                                intype:$grady1, intype:$grady2)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
+                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
+                 []>;
+multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
+                               NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
 }
 
-let IsSuld = 2 in {
-def SULD_1D_V2I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
+defm TEX_UNIFIED_3D_F32_F32_GRAD
+  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_F32_GRAD
+  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_F32_GRAD
+  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
+                            NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
+                 []>;
+multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
+                            NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
 
-def SULD_1D_ARRAY_V2I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
+defm TEX_UNIFIED_CUBE_F32_F32
+  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32
+  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32
+  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
+                                  NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
+                 []>;
+multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
+                                  NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
+                                       (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
+                                       (ins i64imm:$t)>;
+}
 
-def SULD_2D_V2I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
+defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
+  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
+                           Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
+  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
+                           Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
+  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
+                           Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
+                                  NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
+                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
+                 []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
+                                  NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
+                                       (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
+                                       (ins i64imm:$t)>;
+}
 
-def SULD_2D_ARRAY_V2I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
+  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
+  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
+  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+                                        NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$r, outtype:$g,
+                      outtype:$b, outtype:$a),
+                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
+                                intype:$lod)),
+                 inst # " \t\\{$r, $g, $b, $a\\},"
+                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
+                 []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+                                        NVPTXRegClass intype> {
+  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+                                             (ins Int64Regs:$t)>;
+  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+                                             (ins i64imm:$t)>;
+}
 
-def SULD_3D_V2I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I64_TRAP
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
+  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
+                                 Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
+  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
+                                 Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
+  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
+                                 Int32Regs, Float32Regs>;
+
+class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
+                           NVPTXRegClass intype, dag tex>
+    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
+                      outtype:$v2, outtype:$v3),
+                 !con(tex, (ins intype:$x, intype:$y)),
+                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
+                 []>;
+multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
+                           NVPTXRegClass intype> {
+  def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
 }
 
-let IsSuld = 3 in {
-def SULD_1D_V4I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V4I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V4I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
+defm TLD4_UNIFIED_R_2D_F32_F32
+  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_F32_F32
+  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_F32_F32
+  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_F32_F32
+  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+
+defm TLD4_UNIFIED_R_2D_S32_F32
+  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_S32_F32
+  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_S32_F32
+  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_S32_F32
+  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+
+defm TLD4_UNIFIED_R_2D_U32_F32
+  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_U32_F32
+  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_U32_F32
+  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_U32_F32
+  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
 
-def SULD_1D_ARRAY_V4I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V4I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V4I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
+}
 
-def SULD_2D_V4I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V4I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V4I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
 
-def SULD_2D_ARRAY_V4I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V4I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V4I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
 
+//=== Surface load instructions
 
-def SULD_3D_V4I8_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V4I16_TRAP
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V4I32_TRAP
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
+let IsSuld = true in {
+
+class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r),
+                !con(surf, (ins Int32Regs:$x)),
+                inst # " \\{$r\\}, [$s, \\{$x\\}];",
+                []>;
+multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
 }
 
-// .zero variant
-let IsSuld = true in {
-def SULD_1D_I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
-              []>;
+defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
+defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
+defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
+defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
+
+defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
+defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
+defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
+defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
+
+defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
+defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
+defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
+defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
+
+class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r),
+                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
+                []>;
+multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
+}
 
-def SULD_1D_ARRAY_I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
-              []>;
+defm SULD_1D_ARRAY_I8_CLAMP
+  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_I16_CLAMP
+  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_I32_CLAMP
+  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_I64_CLAMP
+  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
+
+defm SULD_1D_ARRAY_I8_TRAP
+  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_I16_TRAP
+  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_I32_TRAP
+  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_I64_TRAP
+  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
+
+defm SULD_1D_ARRAY_I8_ZERO
+  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_I16_ZERO
+  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_I32_ZERO
+  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_I64_ZERO
+  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
+
+class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
+                []>;
+multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
+}
 
-def SULD_2D_I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
-              []>;
+defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
+defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
+defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
+defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
+
+defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
+defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
+defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
+defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
+
+defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
+defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
+defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
+defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
+
+class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r),
+                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+                []>;
+multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
+}
 
-def SULD_2D_ARRAY_I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
-              []>;
+defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
+
+defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
+
+defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
+
+class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+                []>;
+multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
+}
 
-def SULD_3D_I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
+defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
+defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
+defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
+defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
+
+defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
+defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
+defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
+defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
+
+defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
+defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
+defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
+defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
 }
 
 let IsSuld = 2 in {
-def SULD_1D_V2I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V2I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
-              []>;
 
-def SULD_1D_ARRAY_V2I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V2I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
-              []>;
+class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g),
+                !con(surf, (ins Int32Regs:$x)),
+                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
+                []>;
+multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
 
-def SULD_2D_V2I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V2I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
-              []>;
+defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
+defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
+defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
+defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
+defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
+defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
+defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
+
+defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
+defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
+defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
+defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
+
+class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g),
+                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+                []>;
+multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
 
-def SULD_2D_ARRAY_V2I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V2I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
+defm SULD_1D_ARRAY_V2I8_CLAMP
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_CLAMP
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_CLAMP
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_CLAMP
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_1D_ARRAY_V2I8_TRAP
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_TRAP
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_TRAP
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_TRAP
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
+
+defm SULD_1D_ARRAY_V2I8_ZERO
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_ZERO
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_ZERO
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_ZERO
+  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
+
+class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+                []>;
+multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_V2I8_CLAMP
+  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
+defm SULD_2D_V2I16_CLAMP
+  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
+defm SULD_2D_V2I32_CLAMP
+  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
+defm SULD_2D_V2I64_CLAMP
+  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_2D_V2I8_TRAP
+  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
+defm SULD_2D_V2I16_TRAP
+  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
+defm SULD_2D_V2I32_TRAP
+  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
+defm SULD_2D_V2I64_TRAP
+  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
+
+defm SULD_2D_V2I8_ZERO
+  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
+defm SULD_2D_V2I16_ZERO
+  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
+defm SULD_2D_V2I32_ZERO
+  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
+defm SULD_2D_V2I64_ZERO
+  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
+
+class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g),
+                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
+                []>;
+multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_ARRAY_V2I8_CLAMP
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_CLAMP
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_CLAMP
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_CLAMP
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_2D_ARRAY_V2I8_TRAP
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_TRAP
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_TRAP
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_TRAP
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
+
+defm SULD_2D_ARRAY_V2I8_ZERO
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_ZERO
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_ZERO
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_ZERO
+  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
+
+class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+                []>;
+multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
+defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
+defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
+defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
+defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
+defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
+defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
+
+defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
+defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
+defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
+defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
 
-def SULD_3D_V2I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V2I64_ZERO
-  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
-              []>;
 }
 
 let IsSuld = 3 in {
-def SULD_1D_V4I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V4I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
-def SULD_1D_V4I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x),
-              "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
-              []>;
 
-def SULD_1D_ARRAY_V4I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V4I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
-def SULD_1D_ARRAY_V4I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
-              "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x\\}];",
-              []>;
+class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+                !con(surf, (ins Int32Regs:$x)),
+                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+                []>;
+multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
 
-def SULD_2D_V4I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V4I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
-def SULD_2D_V4I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
-              []>;
+defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
+defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
+defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
+defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
+defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
+
+defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
+defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
+defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
+
+class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
+                []>;
+multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
 
-def SULD_2D_ARRAY_V4I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V4I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
-def SULD_2D_ARRAY_V4I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
-              "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$l, $x, $y, $y\\}];",
-              []>;
+defm SULD_1D_ARRAY_V4I8_CLAMP
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_CLAMP
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_CLAMP
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_1D_ARRAY_V4I8_TRAP
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_TRAP
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_TRAP
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
+
+defm SULD_1D_ARRAY_V4I8_ZERO
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_ZERO
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_ZERO
+  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
+
+class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+                []>;
+multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
 
+defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
+defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
+defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
+defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
+defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
+
+defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
+defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
+defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
+
+class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
+                []>;
+multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_ARRAY_V4I8_CLAMP
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_CLAMP
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_CLAMP
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_2D_ARRAY_V4I8_TRAP
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_TRAP
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_TRAP
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
+
+defm SULD_2D_ARRAY_V4I8_ZERO
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_ZERO
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_ZERO
+  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
+
+class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
+                []>;
+multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
+  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
+defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
+defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
+defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
+defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
+
+defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
+defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
+defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
 
-def SULD_3D_V4I8_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V4I16_ZERO
-  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
-def SULD_3D_V4I32_ZERO
-  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
-              "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
-              "[$s, \\{$x, $y, $z, $z\\}];",
-              []>;
 }
 
 //-----------------------------------
@@ -4768,56 +4028,88 @@ def SULD_3D_V4I32_ZERO
 //-----------------------------------
 
 let IsSurfTexQuery = true in {
-def TXQ_CHANNEL_ORDER
+def TXQ_CHANNEL_ORDER_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "txq.channel_order.b32 \t$d, [$a];",
               []>;
-def TXQ_CHANNEL_DATA_TYPE
+def TXQ_CHANNEL_ORDER_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "txq.channel_order.b32 \t$d, [$a];",
+              []>;
+def TXQ_CHANNEL_DATA_TYPE_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "txq.channel_data_type.b32 \t$d, [$a];",
               []>;
-def TXQ_WIDTH
+def TXQ_CHANNEL_DATA_TYPE_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "txq.channel_data_type.b32 \t$d, [$a];",
+              []>;
+def TXQ_WIDTH_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "txq.width.b32 \t$d, [$a];",
               []>;
-def TXQ_HEIGHT
+def TXQ_WIDTH_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "txq.width.b32 \t$d, [$a];",
+              []>;
+def TXQ_HEIGHT_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "txq.height.b32 \t$d, [$a];",
               []>;
-def TXQ_DEPTH
+def TXQ_HEIGHT_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "txq.height.b32 \t$d, [$a];",
+              []>;
+def TXQ_DEPTH_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "txq.depth.b32 \t$d, [$a];",
               []>;
-def TXQ_ARRAY_SIZE
+def TXQ_DEPTH_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "txq.depth.b32 \t$d, [$a];",
+              []>;
+def TXQ_ARRAY_SIZE_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "txq.array_size.b32 \t$d, [$a];",
               []>;
-def TXQ_NUM_SAMPLES
+def TXQ_ARRAY_SIZE_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "txq.array_size.b32 \t$d, [$a];",
+              []>;
+def TXQ_NUM_SAMPLES_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "txq.num_samples.b32 \t$d, [$a];",
               []>;
-def TXQ_NUM_MIPMAP_LEVELS
+def TXQ_NUM_SAMPLES_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "txq.num_samples.b32 \t$d, [$a];",
+              []>;
+def TXQ_NUM_MIPMAP_LEVELS_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "txq.num_mipmap_levels.b32 \t$d, [$a];",
               []>;
+def TXQ_NUM_MIPMAP_LEVELS_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "txq.num_mipmap_levels.b32 \t$d, [$a];",
+              []>;
 }
 
 def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
-          (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
+          (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
-          (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+          (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_txq_width Int64Regs:$a),
-          (TXQ_WIDTH Int64Regs:$a)>;
+          (TXQ_WIDTH_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_txq_height Int64Regs:$a),
-          (TXQ_HEIGHT Int64Regs:$a)>;
+          (TXQ_HEIGHT_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
-          (TXQ_DEPTH Int64Regs:$a)>;
+          (TXQ_DEPTH_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
-          (TXQ_ARRAY_SIZE Int64Regs:$a)>;
+          (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
-          (TXQ_NUM_SAMPLES Int64Regs:$a)>;
+          (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
-          (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
+          (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
 
 
 //-----------------------------------
@@ -4825,44 +4117,68 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
 //-----------------------------------
 
 let IsSurfTexQuery = true in {
-def SUQ_CHANNEL_ORDER
+def SUQ_CHANNEL_ORDER_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "suq.channel_order.b32 \t$d, [$a];",
               []>;
-def SUQ_CHANNEL_DATA_TYPE
+def SUQ_CHANNEL_ORDER_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "suq.channel_order.b32 \t$d, [$a];",
+              []>;
+def SUQ_CHANNEL_DATA_TYPE_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "suq.channel_data_type.b32 \t$d, [$a];",
               []>;
-def SUQ_WIDTH
+def SUQ_CHANNEL_DATA_TYPE_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "suq.channel_data_type.b32 \t$d, [$a];",
+              []>;
+def SUQ_WIDTH_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "suq.width.b32 \t$d, [$a];",
               []>;
-def SUQ_HEIGHT
+def SUQ_WIDTH_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "suq.width.b32 \t$d, [$a];",
+              []>;
+def SUQ_HEIGHT_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "suq.height.b32 \t$d, [$a];",
               []>;
-def SUQ_DEPTH
+def SUQ_HEIGHT_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "suq.height.b32 \t$d, [$a];",
+              []>;
+def SUQ_DEPTH_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "suq.depth.b32 \t$d, [$a];",
               []>;
-def SUQ_ARRAY_SIZE
+def SUQ_DEPTH_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "suq.depth.b32 \t$d, [$a];",
+              []>;
+def SUQ_ARRAY_SIZE_R
   : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
               "suq.array_size.b32 \t$d, [$a];",
               []>;
+def SUQ_ARRAY_SIZE_I
+  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+              "suq.array_size.b32 \t$d, [$a];",
+              []>;
 }
 
 def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
-          (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
+          (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
-          (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+          (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_suq_width Int64Regs:$a),
-          (SUQ_WIDTH Int64Regs:$a)>;
+          (SUQ_WIDTH_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_suq_height Int64Regs:$a),
-          (SUQ_HEIGHT Int64Regs:$a)>;
+          (SUQ_HEIGHT_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
-          (SUQ_DEPTH Int64Regs:$a)>;
+          (SUQ_DEPTH_R Int64Regs:$a)>;
 def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
-          (SUQ_ARRAY_SIZE Int64Regs:$a)>;
+          (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
 
 
 //===- Handle Query -------------------------------------------------------===//
@@ -4884,1329 +4200,522 @@ def ISTYPEP_TEXTURE
 //===- Surface Stores -----------------------------------------------------===//
 
 let IsSust = true in {
-// Unformatted
-// .clamp variant
-def SUST_B_1D_B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
-              "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
-              "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_V2B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-              "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-              "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-              "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-              "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V4B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
-                   Int16Regs:$b, Int16Regs:$a),
-              "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_V4B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
-                   Int16Regs:$b, Int16Regs:$a),
-              "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_V4B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
-                   Int32Regs:$b, Int32Regs:$a),
-              "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-
-
-def SUST_B_1D_ARRAY_B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
-              "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
-              "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
-                   Int32Regs:$g),
-              "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
-                   Int64Regs:$g),
-              "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
-              "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-             "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
-                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-             "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-
-
-def SUST_B_2D_B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-              "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-              "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-              "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-              "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_V2B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
-                   Int32Regs:$g),
-              "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
-                   Int64Regs:$g),
-              "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V4B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
-              "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_V4B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-             "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_V4B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
-                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-             "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-
-
-def SUST_B_2D_ARRAY_B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r),
-              "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r),
-              "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r),
-              "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int64Regs:$r),
-              "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g),
-             "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r, Int32Regs:$g),
-             "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int64Regs:$r, Int64Regs:$g),
-             "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-      "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
-      "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-     "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
-     "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-     "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
-     "\\{$r, $g, $b, $a\\};",
-              []>;
-
-
-def SUST_B_3D_B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r),
-              "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r),
-              "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r),
-              "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int64Regs:$r),
-              "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_V2B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r, Int32Regs:$g),
-              "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B64_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int64Regs:$r, Int64Regs:$g),
-              "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V4B8_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-         "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
-         "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_3D_V4B16_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-        "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
-        "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_3D_V4B32_CLAMP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-        "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
-        "\\{$r, $g, $b, $a\\};",
-              []>;
-
-
-// .trap variant
-def SUST_B_1D_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
-              "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
-              "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-              "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-              "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-              "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-              "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
-                   Int16Regs:$b, Int16Regs:$a),
-              "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
-                   Int16Regs:$b, Int16Regs:$a),
-              "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
-                   Int32Regs:$b, Int32Regs:$a),
-              "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-
-
-def SUST_B_1D_ARRAY_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
-              "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
-              "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
-                   Int32Regs:$g),
-              "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
-                   Int64Regs:$g),
-              "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
-              "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-             "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
-                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-             "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-
-
-def SUST_B_2D_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-              "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-              "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-              "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-              "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
-                   Int32Regs:$g),
-              "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
-                   Int64Regs:$g),
-              "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
-              "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-             "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
-                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-             "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-
-
-def SUST_B_2D_ARRAY_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r),
-              "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r),
-              "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r),
-              "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int64Regs:$r),
-              "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g),
-             "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r, Int32Regs:$g),
-             "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int64Regs:$r, Int64Regs:$g),
-             "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-      "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-      "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-     "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-     "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-     "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-     "\\{$r, $g, $b, $a\\};",
-              []>;
-
-
-def SUST_B_3D_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r),
-              "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r),
-              "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r),
-              "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int64Regs:$r),
-              "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r, Int32Regs:$g),
-              "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B64_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int64Regs:$r, Int64Regs:$g),
-              "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-         "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-         "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_3D_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-        "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-        "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_3D_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-        "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-        "\\{$r, $g, $b, $a\\};",
-              []>;
-
-
-// .zero variant
-def SUST_B_1D_B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
-              "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
-              "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_V2B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-              "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-              "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-              "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V2B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-              "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_V4B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
-                   Int16Regs:$b, Int16Regs:$a),
-              "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_V4B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
-                   Int16Regs:$b, Int16Regs:$a),
-              "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_V4B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
-                   Int32Regs:$b, Int32Regs:$a),
-              "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
 
+class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, intype:$r)),
+                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
+                []>;
+multiclass SUST_1D<string inst, NVPTXRegClass intype> {
+  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
+}
 
-def SUST_B_1D_ARRAY_B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
-              "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
-              "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
-              "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
-                   Int32Regs:$g),
-              "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V2B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
-                   Int64Regs:$g),
-              "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
-              "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-             "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_1D_ARRAY_V4B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
-                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-             "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
+defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
+defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
+defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
+defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
+defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
+defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
+defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
+defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
+defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
+defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
+defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
+
+class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
+                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
+                []>;
+multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
+  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
 
+defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
+defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
+defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
+defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
+defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
+defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
+defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
+defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
+
+class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
+                                intype:$b, intype:$a)),
+                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+                []>;
+multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
+  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
 
-def SUST_B_2D_B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-              "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-              "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-              "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-              "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_V2B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
-                   Int32Regs:$g),
-              "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V2B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
-                   Int64Regs:$g),
-              "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_B_2D_V4B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
-              "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_V4B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-             "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_V4B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
-                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-             "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
+defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
+defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
+defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
+defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
+defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
+defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
+defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
+                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+                []>;
+multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
+  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
+}
 
+defm SUST_B_1D_ARRAY_B8_CLAMP
+  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_CLAMP
+  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_CLAMP
+  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_CLAMP
+  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_B8_TRAP
+  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_TRAP
+  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_TRAP
+  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_TRAP
+  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_B8_ZERO
+  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_ZERO
+  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_ZERO
+  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_ZERO
+  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_ARRAY_B8_TRAP
+  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_B16_TRAP
+  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_B32_TRAP
+  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+                                intype:$r, intype:$g)),
+                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+                []>;
+multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
+  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
+}
 
-def SUST_B_2D_ARRAY_B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r),
-              "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r),
-              "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r),
-              "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int64Regs:$r),
-              "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g),
-             "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r, Int32Regs:$g),
-             "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V2B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int64Regs:$r, Int64Regs:$g),
-             "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-      "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
-      "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-     "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
-     "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_2D_ARRAY_V4B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-     "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
-     "\\{$r, $g, $b, $a\\};",
-              []>;
+defm SUST_B_1D_ARRAY_V2B8_CLAMP
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_CLAMP
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_CLAMP
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_CLAMP
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_V2B8_TRAP
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_TRAP
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_TRAP
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_TRAP
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_V2B8_ZERO
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_ZERO
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_ZERO
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_ZERO
+  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_ARRAY_V2B8_TRAP
+  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V2B16_TRAP
+  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V2B32_TRAP
+  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+                                intype:$r, intype:$g, intype:$b, intype:$a)),
+                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
+                []>;
+multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
+  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
+}
 
+defm SUST_B_1D_ARRAY_V4B8_CLAMP
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_CLAMP
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_CLAMP
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_1D_ARRAY_V4B8_TRAP
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_TRAP
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_TRAP
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_1D_ARRAY_V4B8_ZERO
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_ZERO
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_ZERO
+  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_1D_ARRAY_V4B8_TRAP
+  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V4B16_TRAP
+  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V4B32_TRAP
+  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
+
+class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
+                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
+                []>;
+multiclass SUST_2D<string inst, NVPTXRegClass intype> {
+  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
+}
 
-def SUST_B_3D_B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r),
-              "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r),
-              "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r),
-              "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int64Regs:$r),
-              "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_B_3D_V2B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r, Int32Regs:$g),
-              "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V2B64_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int64Regs:$r, Int64Regs:$g),
-              "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_B_3D_V4B8_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-         "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
-         "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_3D_V4B16_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-        "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
-        "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_B_3D_V4B32_ZERO
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-        "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
-        "\\{$r, $g, $b, $a\\};",
-              []>;
+defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
+defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
+defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
+defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
+defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
+defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
+defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
+defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
+defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
+defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
+defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
+
+class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+                                intype:$r, intype:$g)),
+                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+                []>;
+multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
+  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
 
+defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
+defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
+defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
+defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
+defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
+defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
+defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
+defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
+
+class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+                                intype:$r, intype:$g, intype:$b, intype:$a)),
+                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
+                []>;
+multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
+  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
 
+defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
+defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
+defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
+defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
+defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
+defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
+defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+                                intype:$r)),
+                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+                []>;
+multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
+  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
+}
 
-// Formatted
+defm SUST_B_2D_ARRAY_B8_CLAMP
+  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_CLAMP
+  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_CLAMP
+  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_CLAMP
+  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_B8_TRAP
+  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_TRAP
+  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_TRAP
+  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_TRAP
+  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_B8_ZERO
+  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_ZERO
+  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_ZERO
+  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_ZERO
+  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_ARRAY_B8_TRAP
+  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_B16_TRAP
+  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_B32_TRAP
+  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+                                intype:$r, intype:$g)),
+                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
+                []>;
+multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
+  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
+}
 
-def SUST_P_1D_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-              "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_P_1D_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-              "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_P_1D_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
-              "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
-              []>;
-def SUST_P_1D_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-              "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_1D_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-              "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_1D_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-              "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_1D_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
-                   Int16Regs:$b, Int16Regs:$a),
-              "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_1D_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
-                   Int16Regs:$b, Int16Regs:$a),
-              "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_1D_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
-                   Int32Regs:$b, Int32Regs:$a),
-              "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
-              []>;
+defm SUST_B_2D_ARRAY_V2B8_CLAMP
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_CLAMP
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_CLAMP
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_CLAMP
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_V2B8_TRAP
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_TRAP
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_TRAP
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_TRAP
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_V2B8_ZERO
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_ZERO
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_ZERO
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_ZERO
+  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_ARRAY_V2B8_TRAP
+  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V2B16_TRAP
+  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V2B32_TRAP
+  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+                                intype:$r, intype:$g, intype:$b, intype:$a)),
+                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
+                []>;
+multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
+  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
+}
 
+defm SUST_B_2D_ARRAY_V4B8_CLAMP
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_CLAMP
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_CLAMP
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_2D_ARRAY_V4B8_TRAP
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_TRAP
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_TRAP
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_2D_ARRAY_V4B8_ZERO
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_ZERO
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_ZERO
+  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_2D_ARRAY_V4B8_TRAP
+  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V4B16_TRAP
+  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V4B32_TRAP
+  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
+
+class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+                                intype:$r)),
+                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+                []>;
+multiclass SUST_3D<string inst, NVPTXRegClass intype> {
+  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
+}
 
-def SUST_P_1D_ARRAY_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
-              "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_P_1D_ARRAY_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
-              "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_P_1D_ARRAY_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
-              "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
-              []>;
-def SUST_P_1D_ARRAY_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_1D_ARRAY_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_1D_ARRAY_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
-                   Int32Regs:$g),
-              "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_1D_ARRAY_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
-              "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_1D_ARRAY_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-             "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_1D_ARRAY_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
-                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-             "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
+defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
+defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
+defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
+defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
+
+defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
+defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
+defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
+defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
+
+defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
+defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
+defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
+defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
+
+defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
+defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
+defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
+
+class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+                                intype:$r, intype:$g)),
+                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
+                []>;
+multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
+  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
 
+defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
+defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
+defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
+defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
+defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
+defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
+defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
+defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
+defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
+
+class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+    : NVPTXInst<(outs),
+                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+                                intype:$r, intype:$g, intype:$b, intype:$a)),
+                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
+                []>;
+multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
+  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
 
-def SUST_P_2D_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-              "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_P_2D_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-              "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_P_2D_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-              "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
-              []>;
-def SUST_P_2D_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_2D_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g),
-              "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_2D_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
-                   Int32Regs:$g),
-              "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
-              []>;
-def SUST_P_2D_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-              "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
-              "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_2D_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
-                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-             "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_2D_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
-                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-             "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
-             "\\{$r, $g, $b, $a\\};",
-              []>;
+defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
 
+defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
+defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
+defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
 
-def SUST_P_2D_ARRAY_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r),
-              "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_P_2D_ARRAY_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r),
-              "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_P_2D_ARRAY_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r),
-              "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
-              []>;
-def SUST_P_2D_ARRAY_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_P_2D_ARRAY_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g),
-             "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_P_2D_ARRAY_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r, Int32Regs:$g),
-             "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-             "\\{$r, $g\\};",
-              []>;
-def SUST_P_2D_ARRAY_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-      "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-      "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_2D_ARRAY_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-     "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-     "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_2D_ARRAY_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
-                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-     "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
-     "\\{$r, $g, $b, $a\\};",
-              []>;
+defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
+defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
+defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
 
+defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
+defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
+defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
 
-def SUST_P_3D_B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r),
-              "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_P_3D_B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r),
-              "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_P_3D_B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r),
-              "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
-              []>;
-def SUST_P_3D_V2B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_P_3D_V2B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g),
-              "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_P_3D_V2B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r, Int32Regs:$g),
-              "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-              "\\{$r, $g\\};",
-              []>;
-def SUST_P_3D_V4B8_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-         "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-         "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_3D_V4B16_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-        "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-        "\\{$r, $g, $b, $a\\};",
-              []>;
-def SUST_P_3D_V4B32_TRAP
-  : NVPTXInst<(outs),
-              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
-                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-        "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
-        "\\{$r, $g, $b, $a\\};",
-              []>;
 }
 
 // Surface store instruction patterns
@@ -6216,248 +4725,248 @@ def SUST_P_3D_V4B32_TRAP
 // .clamp variant
 def : Pat<(int_nvvm_sust_b_1d_i8_clamp
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i16_clamp
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i32_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
-          (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i64_clamp
            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
-          (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
            Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
            Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
            Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
-          (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
-          (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_2d_i8_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i16_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i32_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-          (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i64_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-          (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-          (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-          (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
            Int32Regs:$g),
-          (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
            Int64Regs:$g),
-          (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
@@ -6466,77 +4975,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
 def : Pat<(int_nvvm_sust_b_3d_i8_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r),
-          (SUST_B_3D_B8_CLAMP Int64Regs:$s,
+          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i16_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r),
-          (SUST_B_3D_B16_CLAMP Int64Regs:$s,
+          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i32_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r),
-          (SUST_B_3D_B32_CLAMP Int64Regs:$s,
+          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i64_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r),
-          (SUST_B_3D_B64_CLAMP Int64Regs:$s,
+          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
+          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
+          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
+          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
+          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
+          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
+          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
+          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
@@ -6544,248 +5053,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
 // .trap variant
 def : Pat<(int_nvvm_sust_b_1d_i8_trap
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i16_trap
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
-          (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i64_trap
            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
-          (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
            Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
            Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
            Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
-          (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
-          (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_2d_i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-          (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i64_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-          (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-          (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-          (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
            Int32Regs:$g),
-          (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
            Int64Regs:$g),
-          (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
@@ -6794,77 +5303,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
 def : Pat<(int_nvvm_sust_b_3d_i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r),
-          (SUST_B_3D_B8_TRAP Int64Regs:$s,
+          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r),
-          (SUST_B_3D_B16_TRAP Int64Regs:$s,
+          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r),
-          (SUST_B_3D_B32_TRAP Int64Regs:$s,
+          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i64_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r),
-          (SUST_B_3D_B64_TRAP Int64Regs:$s,
+          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
+          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
+          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
+          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
+          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
+          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
+          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
+          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
@@ -6872,248 +5381,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
 // .zero variant
 def : Pat<(int_nvvm_sust_b_1d_i8_zero
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i16_zero
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i32_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
-          (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_i64_zero
            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
-          (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
            Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
            Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
            Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
            Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
+          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
-          (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
-          (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
-          (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_2d_i8_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i16_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i32_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-          (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_i64_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-          (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-          (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
-          (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
+          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
            Int32Regs:$g),
-          (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
            Int64Regs:$g),
-          (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
+          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
+          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
+          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
@@ -7122,77 +5631,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
 def : Pat<(int_nvvm_sust_b_3d_i8_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r),
-          (SUST_B_3D_B8_ZERO Int64Regs:$s,
+          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i16_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r),
-          (SUST_B_3D_B16_ZERO Int64Regs:$s,
+          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i32_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r),
-          (SUST_B_3D_B32_ZERO Int64Regs:$s,
+          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_i64_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r),
-          (SUST_B_3D_B64_ZERO Int64Regs:$s,
+          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
+          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
+          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g),
-          (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
+          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r, Int64Regs:$g),
-          (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
+          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int64Regs:$r, Int64Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
+          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
+          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
+          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
@@ -7201,207 +5710,207 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
 
 def : Pat<(int_nvvm_sust_p_1d_i8_trap
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-          (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_1d_i16_trap
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
-          (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_1d_i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
-          (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
            Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-          (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
            Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
            Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
            Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
-          (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
-          (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
-          (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
-          (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_p_2d_i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_2d_i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_2d_i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-          (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
-          (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
 
 
 def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
+          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
-          (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
+          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
-          (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
+          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
            Int32Regs:$g),
-          (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
            Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
            Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
            Int32Regs:$x, Int32Regs:$y,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 
@@ -7410,63 +5919,63 @@ def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
 def : Pat<(int_nvvm_sust_p_3d_i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r),
-          (SUST_P_3D_B8_TRAP Int64Regs:$s,
+          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_3d_i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r),
-          (SUST_P_3D_B16_TRAP Int64Regs:$s,
+          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_3d_i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r),
-          (SUST_P_3D_B32_TRAP Int64Regs:$s,
+          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r)>;
 
 def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
+          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g),
-          (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
+          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g),
-          (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
+          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g)>;
 
 def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
+          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
-          (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
+          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
 
 def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
            Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
-          (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
+          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
            Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
            Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
 

diff  --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 8ae542130a14c..e404cead344b8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -41,7 +41,7 @@ class NVPTXReplaceImageHandles : public MachineFunctionPass {
   }
 private:
   bool processInstr(MachineInstr &MI);
-  void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
+  bool replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
   bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
                           unsigned &Idx);
 };
@@ -76,19 +76,1675 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
+static unsigned suldRegisterToIndexOpcode(unsigned RegOC) {
+  switch (RegOC) {
+  case NVPTX::SULD_1D_I8_CLAMP_R:
+    return NVPTX::SULD_1D_I8_CLAMP_I;
+  case NVPTX::SULD_1D_I16_CLAMP_R:
+    return NVPTX::SULD_1D_I16_CLAMP_I;
+  case NVPTX::SULD_1D_I32_CLAMP_R:
+    return NVPTX::SULD_1D_I32_CLAMP_I;
+  case NVPTX::SULD_1D_I64_CLAMP_R:
+    return NVPTX::SULD_1D_I64_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_I8_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_I8_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_I16_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_I16_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_I32_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_I32_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_I64_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_I64_CLAMP_I;
+  case NVPTX::SULD_2D_I8_CLAMP_R:
+    return NVPTX::SULD_2D_I8_CLAMP_I;
+  case NVPTX::SULD_2D_I16_CLAMP_R:
+    return NVPTX::SULD_2D_I16_CLAMP_I;
+  case NVPTX::SULD_2D_I32_CLAMP_R:
+    return NVPTX::SULD_2D_I32_CLAMP_I;
+  case NVPTX::SULD_2D_I64_CLAMP_R:
+    return NVPTX::SULD_2D_I64_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_I8_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_I8_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_I16_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_I16_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_I32_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_I32_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_I64_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_I64_CLAMP_I;
+  case NVPTX::SULD_3D_I8_CLAMP_R:
+    return NVPTX::SULD_3D_I8_CLAMP_I;
+  case NVPTX::SULD_3D_I16_CLAMP_R:
+    return NVPTX::SULD_3D_I16_CLAMP_I;
+  case NVPTX::SULD_3D_I32_CLAMP_R:
+    return NVPTX::SULD_3D_I32_CLAMP_I;
+  case NVPTX::SULD_3D_I64_CLAMP_R:
+    return NVPTX::SULD_3D_I64_CLAMP_I;
+  case NVPTX::SULD_1D_V2I8_CLAMP_R:
+    return NVPTX::SULD_1D_V2I8_CLAMP_I;
+  case NVPTX::SULD_1D_V2I16_CLAMP_R:
+    return NVPTX::SULD_1D_V2I16_CLAMP_I;
+  case NVPTX::SULD_1D_V2I32_CLAMP_R:
+    return NVPTX::SULD_1D_V2I32_CLAMP_I;
+  case NVPTX::SULD_1D_V2I64_CLAMP_R:
+    return NVPTX::SULD_1D_V2I64_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_I;
+  case NVPTX::SULD_2D_V2I8_CLAMP_R:
+    return NVPTX::SULD_2D_V2I8_CLAMP_I;
+  case NVPTX::SULD_2D_V2I16_CLAMP_R:
+    return NVPTX::SULD_2D_V2I16_CLAMP_I;
+  case NVPTX::SULD_2D_V2I32_CLAMP_R:
+    return NVPTX::SULD_2D_V2I32_CLAMP_I;
+  case NVPTX::SULD_2D_V2I64_CLAMP_R:
+    return NVPTX::SULD_2D_V2I64_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_I;
+  case NVPTX::SULD_3D_V2I8_CLAMP_R:
+    return NVPTX::SULD_3D_V2I8_CLAMP_I;
+  case NVPTX::SULD_3D_V2I16_CLAMP_R:
+    return NVPTX::SULD_3D_V2I16_CLAMP_I;
+  case NVPTX::SULD_3D_V2I32_CLAMP_R:
+    return NVPTX::SULD_3D_V2I32_CLAMP_I;
+  case NVPTX::SULD_3D_V2I64_CLAMP_R:
+    return NVPTX::SULD_3D_V2I64_CLAMP_I;
+  case NVPTX::SULD_1D_V4I8_CLAMP_R:
+    return NVPTX::SULD_1D_V4I8_CLAMP_I;
+  case NVPTX::SULD_1D_V4I16_CLAMP_R:
+    return NVPTX::SULD_1D_V4I16_CLAMP_I;
+  case NVPTX::SULD_1D_V4I32_CLAMP_R:
+    return NVPTX::SULD_1D_V4I32_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_I;
+  case NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R:
+    return NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_I;
+  case NVPTX::SULD_2D_V4I8_CLAMP_R:
+    return NVPTX::SULD_2D_V4I8_CLAMP_I;
+  case NVPTX::SULD_2D_V4I16_CLAMP_R:
+    return NVPTX::SULD_2D_V4I16_CLAMP_I;
+  case NVPTX::SULD_2D_V4I32_CLAMP_R:
+    return NVPTX::SULD_2D_V4I32_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_I;
+  case NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R:
+    return NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_I;
+  case NVPTX::SULD_3D_V4I8_CLAMP_R:
+    return NVPTX::SULD_3D_V4I8_CLAMP_I;
+  case NVPTX::SULD_3D_V4I16_CLAMP_R:
+    return NVPTX::SULD_3D_V4I16_CLAMP_I;
+  case NVPTX::SULD_3D_V4I32_CLAMP_R:
+    return NVPTX::SULD_3D_V4I32_CLAMP_I;
+  case NVPTX::SULD_1D_I8_TRAP_R:
+    return NVPTX::SULD_1D_I8_TRAP_I;
+  case NVPTX::SULD_1D_I16_TRAP_R:
+    return NVPTX::SULD_1D_I16_TRAP_I;
+  case NVPTX::SULD_1D_I32_TRAP_R:
+    return NVPTX::SULD_1D_I32_TRAP_I;
+  case NVPTX::SULD_1D_I64_TRAP_R:
+    return NVPTX::SULD_1D_I64_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_I8_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_I8_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_I16_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_I16_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_I32_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_I32_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_I64_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_I64_TRAP_I;
+  case NVPTX::SULD_2D_I8_TRAP_R:
+    return NVPTX::SULD_2D_I8_TRAP_I;
+  case NVPTX::SULD_2D_I16_TRAP_R:
+    return NVPTX::SULD_2D_I16_TRAP_I;
+  case NVPTX::SULD_2D_I32_TRAP_R:
+    return NVPTX::SULD_2D_I32_TRAP_I;
+  case NVPTX::SULD_2D_I64_TRAP_R:
+    return NVPTX::SULD_2D_I64_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_I8_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_I8_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_I16_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_I16_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_I32_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_I32_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_I64_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_I64_TRAP_I;
+  case NVPTX::SULD_3D_I8_TRAP_R:
+    return NVPTX::SULD_3D_I8_TRAP_I;
+  case NVPTX::SULD_3D_I16_TRAP_R:
+    return NVPTX::SULD_3D_I16_TRAP_I;
+  case NVPTX::SULD_3D_I32_TRAP_R:
+    return NVPTX::SULD_3D_I32_TRAP_I;
+  case NVPTX::SULD_3D_I64_TRAP_R:
+    return NVPTX::SULD_3D_I64_TRAP_I;
+  case NVPTX::SULD_1D_V2I8_TRAP_R:
+    return NVPTX::SULD_1D_V2I8_TRAP_I;
+  case NVPTX::SULD_1D_V2I16_TRAP_R:
+    return NVPTX::SULD_1D_V2I16_TRAP_I;
+  case NVPTX::SULD_1D_V2I32_TRAP_R:
+    return NVPTX::SULD_1D_V2I32_TRAP_I;
+  case NVPTX::SULD_1D_V2I64_TRAP_R:
+    return NVPTX::SULD_1D_V2I64_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_V2I8_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_V2I16_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_V2I32_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_V2I64_TRAP_I;
+  case NVPTX::SULD_2D_V2I8_TRAP_R:
+    return NVPTX::SULD_2D_V2I8_TRAP_I;
+  case NVPTX::SULD_2D_V2I16_TRAP_R:
+    return NVPTX::SULD_2D_V2I16_TRAP_I;
+  case NVPTX::SULD_2D_V2I32_TRAP_R:
+    return NVPTX::SULD_2D_V2I32_TRAP_I;
+  case NVPTX::SULD_2D_V2I64_TRAP_R:
+    return NVPTX::SULD_2D_V2I64_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_V2I8_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_V2I16_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_V2I32_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_V2I64_TRAP_I;
+  case NVPTX::SULD_3D_V2I8_TRAP_R:
+    return NVPTX::SULD_3D_V2I8_TRAP_I;
+  case NVPTX::SULD_3D_V2I16_TRAP_R:
+    return NVPTX::SULD_3D_V2I16_TRAP_I;
+  case NVPTX::SULD_3D_V2I32_TRAP_R:
+    return NVPTX::SULD_3D_V2I32_TRAP_I;
+  case NVPTX::SULD_3D_V2I64_TRAP_R:
+    return NVPTX::SULD_3D_V2I64_TRAP_I;
+  case NVPTX::SULD_1D_V4I8_TRAP_R:
+    return NVPTX::SULD_1D_V4I8_TRAP_I;
+  case NVPTX::SULD_1D_V4I16_TRAP_R:
+    return NVPTX::SULD_1D_V4I16_TRAP_I;
+  case NVPTX::SULD_1D_V4I32_TRAP_R:
+    return NVPTX::SULD_1D_V4I32_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_V4I8_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_V4I16_TRAP_I;
+  case NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R:
+    return NVPTX::SULD_1D_ARRAY_V4I32_TRAP_I;
+  case NVPTX::SULD_2D_V4I8_TRAP_R:
+    return NVPTX::SULD_2D_V4I8_TRAP_I;
+  case NVPTX::SULD_2D_V4I16_TRAP_R:
+    return NVPTX::SULD_2D_V4I16_TRAP_I;
+  case NVPTX::SULD_2D_V4I32_TRAP_R:
+    return NVPTX::SULD_2D_V4I32_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_V4I8_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_V4I16_TRAP_I;
+  case NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R:
+    return NVPTX::SULD_2D_ARRAY_V4I32_TRAP_I;
+  case NVPTX::SULD_3D_V4I8_TRAP_R:
+    return NVPTX::SULD_3D_V4I8_TRAP_I;
+  case NVPTX::SULD_3D_V4I16_TRAP_R:
+    return NVPTX::SULD_3D_V4I16_TRAP_I;
+  case NVPTX::SULD_3D_V4I32_TRAP_R:
+    return NVPTX::SULD_3D_V4I32_TRAP_I;
+  case NVPTX::SULD_1D_I8_ZERO_R:
+    return NVPTX::SULD_1D_I8_ZERO_I;
+  case NVPTX::SULD_1D_I16_ZERO_R:
+    return NVPTX::SULD_1D_I16_ZERO_I;
+  case NVPTX::SULD_1D_I32_ZERO_R:
+    return NVPTX::SULD_1D_I32_ZERO_I;
+  case NVPTX::SULD_1D_I64_ZERO_R:
+    return NVPTX::SULD_1D_I64_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_I8_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_I8_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_I16_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_I16_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_I32_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_I32_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_I64_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_I64_ZERO_I;
+  case NVPTX::SULD_2D_I8_ZERO_R:
+    return NVPTX::SULD_2D_I8_ZERO_I;
+  case NVPTX::SULD_2D_I16_ZERO_R:
+    return NVPTX::SULD_2D_I16_ZERO_I;
+  case NVPTX::SULD_2D_I32_ZERO_R:
+    return NVPTX::SULD_2D_I32_ZERO_I;
+  case NVPTX::SULD_2D_I64_ZERO_R:
+    return NVPTX::SULD_2D_I64_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_I8_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_I8_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_I16_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_I16_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_I32_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_I32_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_I64_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_I64_ZERO_I;
+  case NVPTX::SULD_3D_I8_ZERO_R:
+    return NVPTX::SULD_3D_I8_ZERO_I;
+  case NVPTX::SULD_3D_I16_ZERO_R:
+    return NVPTX::SULD_3D_I16_ZERO_I;
+  case NVPTX::SULD_3D_I32_ZERO_R:
+    return NVPTX::SULD_3D_I32_ZERO_I;
+  case NVPTX::SULD_3D_I64_ZERO_R:
+    return NVPTX::SULD_3D_I64_ZERO_I;
+  case NVPTX::SULD_1D_V2I8_ZERO_R:
+    return NVPTX::SULD_1D_V2I8_ZERO_I;
+  case NVPTX::SULD_1D_V2I16_ZERO_R:
+    return NVPTX::SULD_1D_V2I16_ZERO_I;
+  case NVPTX::SULD_1D_V2I32_ZERO_R:
+    return NVPTX::SULD_1D_V2I32_ZERO_I;
+  case NVPTX::SULD_1D_V2I64_ZERO_R:
+    return NVPTX::SULD_1D_V2I64_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_V2I8_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_V2I16_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_V2I32_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_V2I64_ZERO_I;
+  case NVPTX::SULD_2D_V2I8_ZERO_R:
+    return NVPTX::SULD_2D_V2I8_ZERO_I;
+  case NVPTX::SULD_2D_V2I16_ZERO_R:
+    return NVPTX::SULD_2D_V2I16_ZERO_I;
+  case NVPTX::SULD_2D_V2I32_ZERO_R:
+    return NVPTX::SULD_2D_V2I32_ZERO_I;
+  case NVPTX::SULD_2D_V2I64_ZERO_R:
+    return NVPTX::SULD_2D_V2I64_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_V2I8_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_V2I16_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_V2I32_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_V2I64_ZERO_I;
+  case NVPTX::SULD_3D_V2I8_ZERO_R:
+    return NVPTX::SULD_3D_V2I8_ZERO_I;
+  case NVPTX::SULD_3D_V2I16_ZERO_R:
+    return NVPTX::SULD_3D_V2I16_ZERO_I;
+  case NVPTX::SULD_3D_V2I32_ZERO_R:
+    return NVPTX::SULD_3D_V2I32_ZERO_I;
+  case NVPTX::SULD_3D_V2I64_ZERO_R:
+    return NVPTX::SULD_3D_V2I64_ZERO_I;
+  case NVPTX::SULD_1D_V4I8_ZERO_R:
+    return NVPTX::SULD_1D_V4I8_ZERO_I;
+  case NVPTX::SULD_1D_V4I16_ZERO_R:
+    return NVPTX::SULD_1D_V4I16_ZERO_I;
+  case NVPTX::SULD_1D_V4I32_ZERO_R:
+    return NVPTX::SULD_1D_V4I32_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_V4I8_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_V4I16_ZERO_I;
+  case NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R:
+    return NVPTX::SULD_1D_ARRAY_V4I32_ZERO_I;
+  case NVPTX::SULD_2D_V4I8_ZERO_R:
+    return NVPTX::SULD_2D_V4I8_ZERO_I;
+  case NVPTX::SULD_2D_V4I16_ZERO_R:
+    return NVPTX::SULD_2D_V4I16_ZERO_I;
+  case NVPTX::SULD_2D_V4I32_ZERO_R:
+    return NVPTX::SULD_2D_V4I32_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_V4I8_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_V4I16_ZERO_I;
+  case NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R:
+    return NVPTX::SULD_2D_ARRAY_V4I32_ZERO_I;
+  case NVPTX::SULD_3D_V4I8_ZERO_R:
+    return NVPTX::SULD_3D_V4I8_ZERO_I;
+  case NVPTX::SULD_3D_V4I16_ZERO_R:
+    return NVPTX::SULD_3D_V4I16_ZERO_I;
+  case NVPTX::SULD_3D_V4I32_ZERO_R:
+    return NVPTX::SULD_3D_V4I32_ZERO_I;
+  default:
+    llvm_unreachable("Unhandled SULD opcode");
+  }
+}
+
+static unsigned sustRegisterToIndexOpcode(unsigned RegOC) {
+  switch (RegOC) {
+  case NVPTX::SUST_B_1D_B8_CLAMP_R:
+    return NVPTX::SUST_B_1D_B8_CLAMP_I;
+  case NVPTX::SUST_B_1D_B16_CLAMP_R:
+    return NVPTX::SUST_B_1D_B16_CLAMP_I;
+  case NVPTX::SUST_B_1D_B32_CLAMP_R:
+    return NVPTX::SUST_B_1D_B32_CLAMP_I;
+  case NVPTX::SUST_B_1D_B64_CLAMP_R:
+    return NVPTX::SUST_B_1D_B64_CLAMP_I;
+  case NVPTX::SUST_B_1D_V2B8_CLAMP_R:
+    return NVPTX::SUST_B_1D_V2B8_CLAMP_I;
+  case NVPTX::SUST_B_1D_V2B16_CLAMP_R:
+    return NVPTX::SUST_B_1D_V2B16_CLAMP_I;
+  case NVPTX::SUST_B_1D_V2B32_CLAMP_R:
+    return NVPTX::SUST_B_1D_V2B32_CLAMP_I;
+  case NVPTX::SUST_B_1D_V2B64_CLAMP_R:
+    return NVPTX::SUST_B_1D_V2B64_CLAMP_I;
+  case NVPTX::SUST_B_1D_V4B8_CLAMP_R:
+    return NVPTX::SUST_B_1D_V4B8_CLAMP_I;
+  case NVPTX::SUST_B_1D_V4B16_CLAMP_R:
+    return NVPTX::SUST_B_1D_V4B16_CLAMP_I;
+  case NVPTX::SUST_B_1D_V4B32_CLAMP_R:
+    return NVPTX::SUST_B_1D_V4B32_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_B8_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_B8_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_B16_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_B16_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_B32_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_B32_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_B64_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_B64_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B8_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B8_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B16_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B16_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B32_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B32_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B64_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B64_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B8_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B8_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B16_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B16_CLAMP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B32_CLAMP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B32_CLAMP_I;
+  case NVPTX::SUST_B_2D_B8_CLAMP_R:
+    return NVPTX::SUST_B_2D_B8_CLAMP_I;
+  case NVPTX::SUST_B_2D_B16_CLAMP_R:
+    return NVPTX::SUST_B_2D_B16_CLAMP_I;
+  case NVPTX::SUST_B_2D_B32_CLAMP_R:
+    return NVPTX::SUST_B_2D_B32_CLAMP_I;
+  case NVPTX::SUST_B_2D_B64_CLAMP_R:
+    return NVPTX::SUST_B_2D_B64_CLAMP_I;
+  case NVPTX::SUST_B_2D_V2B8_CLAMP_R:
+    return NVPTX::SUST_B_2D_V2B8_CLAMP_I;
+  case NVPTX::SUST_B_2D_V2B16_CLAMP_R:
+    return NVPTX::SUST_B_2D_V2B16_CLAMP_I;
+  case NVPTX::SUST_B_2D_V2B32_CLAMP_R:
+    return NVPTX::SUST_B_2D_V2B32_CLAMP_I;
+  case NVPTX::SUST_B_2D_V2B64_CLAMP_R:
+    return NVPTX::SUST_B_2D_V2B64_CLAMP_I;
+  case NVPTX::SUST_B_2D_V4B8_CLAMP_R:
+    return NVPTX::SUST_B_2D_V4B8_CLAMP_I;
+  case NVPTX::SUST_B_2D_V4B16_CLAMP_R:
+    return NVPTX::SUST_B_2D_V4B16_CLAMP_I;
+  case NVPTX::SUST_B_2D_V4B32_CLAMP_R:
+    return NVPTX::SUST_B_2D_V4B32_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_B8_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_B8_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_B16_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_B16_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_B32_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_B32_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_B64_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_B64_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B8_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B8_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B16_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B16_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B32_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B32_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B64_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B64_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B8_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B8_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B16_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B16_CLAMP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B32_CLAMP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B32_CLAMP_I;
+  case NVPTX::SUST_B_3D_B8_CLAMP_R:
+    return NVPTX::SUST_B_3D_B8_CLAMP_I;
+  case NVPTX::SUST_B_3D_B16_CLAMP_R:
+    return NVPTX::SUST_B_3D_B16_CLAMP_I;
+  case NVPTX::SUST_B_3D_B32_CLAMP_R:
+    return NVPTX::SUST_B_3D_B32_CLAMP_I;
+  case NVPTX::SUST_B_3D_B64_CLAMP_R:
+    return NVPTX::SUST_B_3D_B64_CLAMP_I;
+  case NVPTX::SUST_B_3D_V2B8_CLAMP_R:
+    return NVPTX::SUST_B_3D_V2B8_CLAMP_I;
+  case NVPTX::SUST_B_3D_V2B16_CLAMP_R:
+    return NVPTX::SUST_B_3D_V2B16_CLAMP_I;
+  case NVPTX::SUST_B_3D_V2B32_CLAMP_R:
+    return NVPTX::SUST_B_3D_V2B32_CLAMP_I;
+  case NVPTX::SUST_B_3D_V2B64_CLAMP_R:
+    return NVPTX::SUST_B_3D_V2B64_CLAMP_I;
+  case NVPTX::SUST_B_3D_V4B8_CLAMP_R:
+    return NVPTX::SUST_B_3D_V4B8_CLAMP_I;
+  case NVPTX::SUST_B_3D_V4B16_CLAMP_R:
+    return NVPTX::SUST_B_3D_V4B16_CLAMP_I;
+  case NVPTX::SUST_B_3D_V4B32_CLAMP_R:
+    return NVPTX::SUST_B_3D_V4B32_CLAMP_I;
+  case NVPTX::SUST_B_1D_B8_TRAP_R:
+    return NVPTX::SUST_B_1D_B8_TRAP_I;
+  case NVPTX::SUST_B_1D_B16_TRAP_R:
+    return NVPTX::SUST_B_1D_B16_TRAP_I;
+  case NVPTX::SUST_B_1D_B32_TRAP_R:
+    return NVPTX::SUST_B_1D_B32_TRAP_I;
+  case NVPTX::SUST_B_1D_B64_TRAP_R:
+    return NVPTX::SUST_B_1D_B64_TRAP_I;
+  case NVPTX::SUST_B_1D_V2B8_TRAP_R:
+    return NVPTX::SUST_B_1D_V2B8_TRAP_I;
+  case NVPTX::SUST_B_1D_V2B16_TRAP_R:
+    return NVPTX::SUST_B_1D_V2B16_TRAP_I;
+  case NVPTX::SUST_B_1D_V2B32_TRAP_R:
+    return NVPTX::SUST_B_1D_V2B32_TRAP_I;
+  case NVPTX::SUST_B_1D_V2B64_TRAP_R:
+    return NVPTX::SUST_B_1D_V2B64_TRAP_I;
+  case NVPTX::SUST_B_1D_V4B8_TRAP_R:
+    return NVPTX::SUST_B_1D_V4B8_TRAP_I;
+  case NVPTX::SUST_B_1D_V4B16_TRAP_R:
+    return NVPTX::SUST_B_1D_V4B16_TRAP_I;
+  case NVPTX::SUST_B_1D_V4B32_TRAP_R:
+    return NVPTX::SUST_B_1D_V4B32_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_B8_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_B8_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_B16_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_B16_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_B32_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_B32_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_B64_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_B64_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B64_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B64_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP_I;
+  case NVPTX::SUST_B_2D_B8_TRAP_R:
+    return NVPTX::SUST_B_2D_B8_TRAP_I;
+  case NVPTX::SUST_B_2D_B16_TRAP_R:
+    return NVPTX::SUST_B_2D_B16_TRAP_I;
+  case NVPTX::SUST_B_2D_B32_TRAP_R:
+    return NVPTX::SUST_B_2D_B32_TRAP_I;
+  case NVPTX::SUST_B_2D_B64_TRAP_R:
+    return NVPTX::SUST_B_2D_B64_TRAP_I;
+  case NVPTX::SUST_B_2D_V2B8_TRAP_R:
+    return NVPTX::SUST_B_2D_V2B8_TRAP_I;
+  case NVPTX::SUST_B_2D_V2B16_TRAP_R:
+    return NVPTX::SUST_B_2D_V2B16_TRAP_I;
+  case NVPTX::SUST_B_2D_V2B32_TRAP_R:
+    return NVPTX::SUST_B_2D_V2B32_TRAP_I;
+  case NVPTX::SUST_B_2D_V2B64_TRAP_R:
+    return NVPTX::SUST_B_2D_V2B64_TRAP_I;
+  case NVPTX::SUST_B_2D_V4B8_TRAP_R:
+    return NVPTX::SUST_B_2D_V4B8_TRAP_I;
+  case NVPTX::SUST_B_2D_V4B16_TRAP_R:
+    return NVPTX::SUST_B_2D_V4B16_TRAP_I;
+  case NVPTX::SUST_B_2D_V4B32_TRAP_R:
+    return NVPTX::SUST_B_2D_V4B32_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_B8_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_B8_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_B16_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_B16_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_B32_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_B32_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_B64_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_B64_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B64_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B64_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP_I;
+  case NVPTX::SUST_B_3D_B8_TRAP_R:
+    return NVPTX::SUST_B_3D_B8_TRAP_I;
+  case NVPTX::SUST_B_3D_B16_TRAP_R:
+    return NVPTX::SUST_B_3D_B16_TRAP_I;
+  case NVPTX::SUST_B_3D_B32_TRAP_R:
+    return NVPTX::SUST_B_3D_B32_TRAP_I;
+  case NVPTX::SUST_B_3D_B64_TRAP_R:
+    return NVPTX::SUST_B_3D_B64_TRAP_I;
+  case NVPTX::SUST_B_3D_V2B8_TRAP_R:
+    return NVPTX::SUST_B_3D_V2B8_TRAP_I;
+  case NVPTX::SUST_B_3D_V2B16_TRAP_R:
+    return NVPTX::SUST_B_3D_V2B16_TRAP_I;
+  case NVPTX::SUST_B_3D_V2B32_TRAP_R:
+    return NVPTX::SUST_B_3D_V2B32_TRAP_I;
+  case NVPTX::SUST_B_3D_V2B64_TRAP_R:
+    return NVPTX::SUST_B_3D_V2B64_TRAP_I;
+  case NVPTX::SUST_B_3D_V4B8_TRAP_R:
+    return NVPTX::SUST_B_3D_V4B8_TRAP_I;
+  case NVPTX::SUST_B_3D_V4B16_TRAP_R:
+    return NVPTX::SUST_B_3D_V4B16_TRAP_I;
+  case NVPTX::SUST_B_3D_V4B32_TRAP_R:
+    return NVPTX::SUST_B_3D_V4B32_TRAP_I;
+  case NVPTX::SUST_B_1D_B8_ZERO_R:
+    return NVPTX::SUST_B_1D_B8_ZERO_I;
+  case NVPTX::SUST_B_1D_B16_ZERO_R:
+    return NVPTX::SUST_B_1D_B16_ZERO_I;
+  case NVPTX::SUST_B_1D_B32_ZERO_R:
+    return NVPTX::SUST_B_1D_B32_ZERO_I;
+  case NVPTX::SUST_B_1D_B64_ZERO_R:
+    return NVPTX::SUST_B_1D_B64_ZERO_I;
+  case NVPTX::SUST_B_1D_V2B8_ZERO_R:
+    return NVPTX::SUST_B_1D_V2B8_ZERO_I;
+  case NVPTX::SUST_B_1D_V2B16_ZERO_R:
+    return NVPTX::SUST_B_1D_V2B16_ZERO_I;
+  case NVPTX::SUST_B_1D_V2B32_ZERO_R:
+    return NVPTX::SUST_B_1D_V2B32_ZERO_I;
+  case NVPTX::SUST_B_1D_V2B64_ZERO_R:
+    return NVPTX::SUST_B_1D_V2B64_ZERO_I;
+  case NVPTX::SUST_B_1D_V4B8_ZERO_R:
+    return NVPTX::SUST_B_1D_V4B8_ZERO_I;
+  case NVPTX::SUST_B_1D_V4B16_ZERO_R:
+    return NVPTX::SUST_B_1D_V4B16_ZERO_I;
+  case NVPTX::SUST_B_1D_V4B32_ZERO_R:
+    return NVPTX::SUST_B_1D_V4B32_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_B8_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_B8_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_B16_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_B16_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_B32_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_B32_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_B64_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_B64_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B8_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B8_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B16_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B16_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B32_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B32_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_V2B64_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_V2B64_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B8_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B8_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B16_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B16_ZERO_I;
+  case NVPTX::SUST_B_1D_ARRAY_V4B32_ZERO_R:
+    return NVPTX::SUST_B_1D_ARRAY_V4B32_ZERO_I;
+  case NVPTX::SUST_B_2D_B8_ZERO_R:
+    return NVPTX::SUST_B_2D_B8_ZERO_I;
+  case NVPTX::SUST_B_2D_B16_ZERO_R:
+    return NVPTX::SUST_B_2D_B16_ZERO_I;
+  case NVPTX::SUST_B_2D_B32_ZERO_R:
+    return NVPTX::SUST_B_2D_B32_ZERO_I;
+  case NVPTX::SUST_B_2D_B64_ZERO_R:
+    return NVPTX::SUST_B_2D_B64_ZERO_I;
+  case NVPTX::SUST_B_2D_V2B8_ZERO_R:
+    return NVPTX::SUST_B_2D_V2B8_ZERO_I;
+  case NVPTX::SUST_B_2D_V2B16_ZERO_R:
+    return NVPTX::SUST_B_2D_V2B16_ZERO_I;
+  case NVPTX::SUST_B_2D_V2B32_ZERO_R:
+    return NVPTX::SUST_B_2D_V2B32_ZERO_I;
+  case NVPTX::SUST_B_2D_V2B64_ZERO_R:
+    return NVPTX::SUST_B_2D_V2B64_ZERO_I;
+  case NVPTX::SUST_B_2D_V4B8_ZERO_R:
+    return NVPTX::SUST_B_2D_V4B8_ZERO_I;
+  case NVPTX::SUST_B_2D_V4B16_ZERO_R:
+    return NVPTX::SUST_B_2D_V4B16_ZERO_I;
+  case NVPTX::SUST_B_2D_V4B32_ZERO_R:
+    return NVPTX::SUST_B_2D_V4B32_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_B8_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_B8_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_B16_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_B16_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_B32_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_B32_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_B64_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_B64_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B8_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B8_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B16_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B16_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B32_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B32_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_V2B64_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_V2B64_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B8_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B8_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B16_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B16_ZERO_I;
+  case NVPTX::SUST_B_2D_ARRAY_V4B32_ZERO_R:
+    return NVPTX::SUST_B_2D_ARRAY_V4B32_ZERO_I;
+  case NVPTX::SUST_B_3D_B8_ZERO_R:
+    return NVPTX::SUST_B_3D_B8_ZERO_I;
+  case NVPTX::SUST_B_3D_B16_ZERO_R:
+    return NVPTX::SUST_B_3D_B16_ZERO_I;
+  case NVPTX::SUST_B_3D_B32_ZERO_R:
+    return NVPTX::SUST_B_3D_B32_ZERO_I;
+  case NVPTX::SUST_B_3D_B64_ZERO_R:
+    return NVPTX::SUST_B_3D_B64_ZERO_I;
+  case NVPTX::SUST_B_3D_V2B8_ZERO_R:
+    return NVPTX::SUST_B_3D_V2B8_ZERO_I;
+  case NVPTX::SUST_B_3D_V2B16_ZERO_R:
+    return NVPTX::SUST_B_3D_V2B16_ZERO_I;
+  case NVPTX::SUST_B_3D_V2B32_ZERO_R:
+    return NVPTX::SUST_B_3D_V2B32_ZERO_I;
+  case NVPTX::SUST_B_3D_V2B64_ZERO_R:
+    return NVPTX::SUST_B_3D_V2B64_ZERO_I;
+  case NVPTX::SUST_B_3D_V4B8_ZERO_R:
+    return NVPTX::SUST_B_3D_V4B8_ZERO_I;
+  case NVPTX::SUST_B_3D_V4B16_ZERO_R:
+    return NVPTX::SUST_B_3D_V4B16_ZERO_I;
+  case NVPTX::SUST_B_3D_V4B32_ZERO_R:
+    return NVPTX::SUST_B_3D_V4B32_ZERO_I;
+  case NVPTX::SUST_P_1D_B8_TRAP_R:
+    return NVPTX::SUST_P_1D_B8_TRAP_I;
+  case NVPTX::SUST_P_1D_B16_TRAP_R:
+    return NVPTX::SUST_P_1D_B16_TRAP_I;
+  case NVPTX::SUST_P_1D_B32_TRAP_R:
+    return NVPTX::SUST_P_1D_B32_TRAP_I;
+  case NVPTX::SUST_P_1D_V2B8_TRAP_R:
+    return NVPTX::SUST_P_1D_V2B8_TRAP_I;
+  case NVPTX::SUST_P_1D_V2B16_TRAP_R:
+    return NVPTX::SUST_P_1D_V2B16_TRAP_I;
+  case NVPTX::SUST_P_1D_V2B32_TRAP_R:
+    return NVPTX::SUST_P_1D_V2B32_TRAP_I;
+  case NVPTX::SUST_P_1D_V4B8_TRAP_R:
+    return NVPTX::SUST_P_1D_V4B8_TRAP_I;
+  case NVPTX::SUST_P_1D_V4B16_TRAP_R:
+    return NVPTX::SUST_P_1D_V4B16_TRAP_I;
+  case NVPTX::SUST_P_1D_V4B32_TRAP_R:
+    return NVPTX::SUST_P_1D_V4B32_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_B8_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_B8_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_B16_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_B16_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_B32_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_B32_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP_I;
+  case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP_R:
+    return NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP_I;
+  case NVPTX::SUST_P_2D_B8_TRAP_R:
+    return NVPTX::SUST_P_2D_B8_TRAP_I;
+  case NVPTX::SUST_P_2D_B16_TRAP_R:
+    return NVPTX::SUST_P_2D_B16_TRAP_I;
+  case NVPTX::SUST_P_2D_B32_TRAP_R:
+    return NVPTX::SUST_P_2D_B32_TRAP_I;
+  case NVPTX::SUST_P_2D_V2B8_TRAP_R:
+    return NVPTX::SUST_P_2D_V2B8_TRAP_I;
+  case NVPTX::SUST_P_2D_V2B16_TRAP_R:
+    return NVPTX::SUST_P_2D_V2B16_TRAP_I;
+  case NVPTX::SUST_P_2D_V2B32_TRAP_R:
+    return NVPTX::SUST_P_2D_V2B32_TRAP_I;
+  case NVPTX::SUST_P_2D_V4B8_TRAP_R:
+    return NVPTX::SUST_P_2D_V4B8_TRAP_I;
+  case NVPTX::SUST_P_2D_V4B16_TRAP_R:
+    return NVPTX::SUST_P_2D_V4B16_TRAP_I;
+  case NVPTX::SUST_P_2D_V4B32_TRAP_R:
+    return NVPTX::SUST_P_2D_V4B32_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_B8_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_B8_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_B16_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_B16_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_B32_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_B32_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP_I;
+  case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP_R:
+    return NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP_I;
+  case NVPTX::SUST_P_3D_B8_TRAP_R:
+    return NVPTX::SUST_P_3D_B8_TRAP_I;
+  case NVPTX::SUST_P_3D_B16_TRAP_R:
+    return NVPTX::SUST_P_3D_B16_TRAP_I;
+  case NVPTX::SUST_P_3D_B32_TRAP_R:
+    return NVPTX::SUST_P_3D_B32_TRAP_I;
+  case NVPTX::SUST_P_3D_V2B8_TRAP_R:
+    return NVPTX::SUST_P_3D_V2B8_TRAP_I;
+  case NVPTX::SUST_P_3D_V2B16_TRAP_R:
+    return NVPTX::SUST_P_3D_V2B16_TRAP_I;
+  case NVPTX::SUST_P_3D_V2B32_TRAP_R:
+    return NVPTX::SUST_P_3D_V2B32_TRAP_I;
+  case NVPTX::SUST_P_3D_V4B8_TRAP_R:
+    return NVPTX::SUST_P_3D_V4B8_TRAP_I;
+  case NVPTX::SUST_P_3D_V4B16_TRAP_R:
+    return NVPTX::SUST_P_3D_V4B16_TRAP_I;
+  case NVPTX::SUST_P_3D_V4B32_TRAP_R:
+    return NVPTX::SUST_P_3D_V4B32_TRAP_I;
+  default:
+    llvm_unreachable("Unhandled SUST opcode");
+  }
+}
+
+static unsigned texRegisterToIndexOpcode(unsigned RegOC) {
+  switch (RegOC) {
+  case NVPTX::TEX_1D_F32_S32_RR:
+    return NVPTX::TEX_1D_F32_S32_IR;
+  case NVPTX::TEX_1D_F32_S32_RI:
+    return NVPTX::TEX_1D_F32_S32_II;
+  case NVPTX::TEX_1D_F32_F32_RR:
+    return NVPTX::TEX_1D_F32_F32_IR;
+  case NVPTX::TEX_1D_F32_F32_RI:
+    return NVPTX::TEX_1D_F32_F32_II;
+  case NVPTX::TEX_1D_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_F32_F32_LEVEL_IR;
+  case NVPTX::TEX_1D_F32_F32_LEVEL_RI:
+    return NVPTX::TEX_1D_F32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_F32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_F32_F32_GRAD_IR;
+  case NVPTX::TEX_1D_F32_F32_GRAD_RI:
+    return NVPTX::TEX_1D_F32_F32_GRAD_II;
+  case NVPTX::TEX_1D_S32_S32_RR:
+    return NVPTX::TEX_1D_S32_S32_IR;
+  case NVPTX::TEX_1D_S32_S32_RI:
+    return NVPTX::TEX_1D_S32_S32_II;
+  case NVPTX::TEX_1D_S32_F32_RR:
+    return NVPTX::TEX_1D_S32_F32_IR;
+  case NVPTX::TEX_1D_S32_F32_RI:
+    return NVPTX::TEX_1D_S32_F32_II;
+  case NVPTX::TEX_1D_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_S32_F32_LEVEL_IR;
+  case NVPTX::TEX_1D_S32_F32_LEVEL_RI:
+    return NVPTX::TEX_1D_S32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_S32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_S32_F32_GRAD_IR;
+  case NVPTX::TEX_1D_S32_F32_GRAD_RI:
+    return NVPTX::TEX_1D_S32_F32_GRAD_II;
+  case NVPTX::TEX_1D_U32_S32_RR:
+    return NVPTX::TEX_1D_U32_S32_IR;
+  case NVPTX::TEX_1D_U32_S32_RI:
+    return NVPTX::TEX_1D_U32_S32_II;
+  case NVPTX::TEX_1D_U32_F32_RR:
+    return NVPTX::TEX_1D_U32_F32_IR;
+  case NVPTX::TEX_1D_U32_F32_RI:
+    return NVPTX::TEX_1D_U32_F32_II;
+  case NVPTX::TEX_1D_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_U32_F32_LEVEL_IR;
+  case NVPTX::TEX_1D_U32_F32_LEVEL_RI:
+    return NVPTX::TEX_1D_U32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_U32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_U32_F32_GRAD_IR;
+  case NVPTX::TEX_1D_U32_F32_GRAD_RI:
+    return NVPTX::TEX_1D_U32_F32_GRAD_II;
+  case NVPTX::TEX_1D_ARRAY_F32_S32_RR:
+    return NVPTX::TEX_1D_ARRAY_F32_S32_IR;
+  case NVPTX::TEX_1D_ARRAY_F32_S32_RI:
+    return NVPTX::TEX_1D_ARRAY_F32_S32_II;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_RR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_IR;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_RI:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_II;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_IR;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RI:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_IR;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RI:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_II;
+  case NVPTX::TEX_1D_ARRAY_S32_S32_RR:
+    return NVPTX::TEX_1D_ARRAY_S32_S32_IR;
+  case NVPTX::TEX_1D_ARRAY_S32_S32_RI:
+    return NVPTX::TEX_1D_ARRAY_S32_S32_II;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_RR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_IR;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_RI:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_II;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_IR;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RI:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_IR;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RI:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_II;
+  case NVPTX::TEX_1D_ARRAY_U32_S32_RR:
+    return NVPTX::TEX_1D_ARRAY_U32_S32_IR;
+  case NVPTX::TEX_1D_ARRAY_U32_S32_RI:
+    return NVPTX::TEX_1D_ARRAY_U32_S32_II;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_RR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_IR;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_RI:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_II;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_IR;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RI:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_IR;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RI:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_II;
+  case NVPTX::TEX_2D_F32_S32_RR:
+    return NVPTX::TEX_2D_F32_S32_IR;
+  case NVPTX::TEX_2D_F32_S32_RI:
+    return NVPTX::TEX_2D_F32_S32_II;
+  case NVPTX::TEX_2D_F32_F32_RR:
+    return NVPTX::TEX_2D_F32_F32_IR;
+  case NVPTX::TEX_2D_F32_F32_RI:
+    return NVPTX::TEX_2D_F32_F32_II;
+  case NVPTX::TEX_2D_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_F32_F32_LEVEL_IR;
+  case NVPTX::TEX_2D_F32_F32_LEVEL_RI:
+    return NVPTX::TEX_2D_F32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_F32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_F32_F32_GRAD_IR;
+  case NVPTX::TEX_2D_F32_F32_GRAD_RI:
+    return NVPTX::TEX_2D_F32_F32_GRAD_II;
+  case NVPTX::TEX_2D_S32_S32_RR:
+    return NVPTX::TEX_2D_S32_S32_IR;
+  case NVPTX::TEX_2D_S32_S32_RI:
+    return NVPTX::TEX_2D_S32_S32_II;
+  case NVPTX::TEX_2D_S32_F32_RR:
+    return NVPTX::TEX_2D_S32_F32_IR;
+  case NVPTX::TEX_2D_S32_F32_RI:
+    return NVPTX::TEX_2D_S32_F32_II;
+  case NVPTX::TEX_2D_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_S32_F32_LEVEL_IR;
+  case NVPTX::TEX_2D_S32_F32_LEVEL_RI:
+    return NVPTX::TEX_2D_S32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_S32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_S32_F32_GRAD_IR;
+  case NVPTX::TEX_2D_S32_F32_GRAD_RI:
+    return NVPTX::TEX_2D_S32_F32_GRAD_II;
+  case NVPTX::TEX_2D_U32_S32_RR:
+    return NVPTX::TEX_2D_U32_S32_IR;
+  case NVPTX::TEX_2D_U32_S32_RI:
+    return NVPTX::TEX_2D_U32_S32_II;
+  case NVPTX::TEX_2D_U32_F32_RR:
+    return NVPTX::TEX_2D_U32_F32_IR;
+  case NVPTX::TEX_2D_U32_F32_RI:
+    return NVPTX::TEX_2D_U32_F32_II;
+  case NVPTX::TEX_2D_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_U32_F32_LEVEL_IR;
+  case NVPTX::TEX_2D_U32_F32_LEVEL_RI:
+    return NVPTX::TEX_2D_U32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_U32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_U32_F32_GRAD_IR;
+  case NVPTX::TEX_2D_U32_F32_GRAD_RI:
+    return NVPTX::TEX_2D_U32_F32_GRAD_II;
+  case NVPTX::TEX_2D_ARRAY_F32_S32_RR:
+    return NVPTX::TEX_2D_ARRAY_F32_S32_IR;
+  case NVPTX::TEX_2D_ARRAY_F32_S32_RI:
+    return NVPTX::TEX_2D_ARRAY_F32_S32_II;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_RR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_IR;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_RI:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_II;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_IR;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RI:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_IR;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RI:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_II;
+  case NVPTX::TEX_2D_ARRAY_S32_S32_RR:
+    return NVPTX::TEX_2D_ARRAY_S32_S32_IR;
+  case NVPTX::TEX_2D_ARRAY_S32_S32_RI:
+    return NVPTX::TEX_2D_ARRAY_S32_S32_II;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_RR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_IR;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_RI:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_II;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_IR;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RI:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_IR;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RI:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_II;
+  case NVPTX::TEX_2D_ARRAY_U32_S32_RR:
+    return NVPTX::TEX_2D_ARRAY_U32_S32_IR;
+  case NVPTX::TEX_2D_ARRAY_U32_S32_RI:
+    return NVPTX::TEX_2D_ARRAY_U32_S32_II;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_RR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_IR;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_RI:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_II;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_IR;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RI:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_IR;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RI:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_II;
+  case NVPTX::TEX_3D_F32_S32_RR:
+    return NVPTX::TEX_3D_F32_S32_IR;
+  case NVPTX::TEX_3D_F32_S32_RI:
+    return NVPTX::TEX_3D_F32_S32_II;
+  case NVPTX::TEX_3D_F32_F32_RR:
+    return NVPTX::TEX_3D_F32_F32_IR;
+  case NVPTX::TEX_3D_F32_F32_RI:
+    return NVPTX::TEX_3D_F32_F32_II;
+  case NVPTX::TEX_3D_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_3D_F32_F32_LEVEL_IR;
+  case NVPTX::TEX_3D_F32_F32_LEVEL_RI:
+    return NVPTX::TEX_3D_F32_F32_LEVEL_II;
+  case NVPTX::TEX_3D_F32_F32_GRAD_RR:
+    return NVPTX::TEX_3D_F32_F32_GRAD_IR;
+  case NVPTX::TEX_3D_F32_F32_GRAD_RI:
+    return NVPTX::TEX_3D_F32_F32_GRAD_II;
+  case NVPTX::TEX_3D_S32_S32_RR:
+    return NVPTX::TEX_3D_S32_S32_IR;
+  case NVPTX::TEX_3D_S32_S32_RI:
+    return NVPTX::TEX_3D_S32_S32_II;
+  case NVPTX::TEX_3D_S32_F32_RR:
+    return NVPTX::TEX_3D_S32_F32_IR;
+  case NVPTX::TEX_3D_S32_F32_RI:
+    return NVPTX::TEX_3D_S32_F32_II;
+  case NVPTX::TEX_3D_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_3D_S32_F32_LEVEL_IR;
+  case NVPTX::TEX_3D_S32_F32_LEVEL_RI:
+    return NVPTX::TEX_3D_S32_F32_LEVEL_II;
+  case NVPTX::TEX_3D_S32_F32_GRAD_RR:
+    return NVPTX::TEX_3D_S32_F32_GRAD_IR;
+  case NVPTX::TEX_3D_S32_F32_GRAD_RI:
+    return NVPTX::TEX_3D_S32_F32_GRAD_II;
+  case NVPTX::TEX_3D_U32_S32_RR:
+    return NVPTX::TEX_3D_U32_S32_IR;
+  case NVPTX::TEX_3D_U32_S32_RI:
+    return NVPTX::TEX_3D_U32_S32_II;
+  case NVPTX::TEX_3D_U32_F32_RR:
+    return NVPTX::TEX_3D_U32_F32_IR;
+  case NVPTX::TEX_3D_U32_F32_RI:
+    return NVPTX::TEX_3D_U32_F32_II;
+  case NVPTX::TEX_3D_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_3D_U32_F32_LEVEL_IR;
+  case NVPTX::TEX_3D_U32_F32_LEVEL_RI:
+    return NVPTX::TEX_3D_U32_F32_LEVEL_II;
+  case NVPTX::TEX_3D_U32_F32_GRAD_RR:
+    return NVPTX::TEX_3D_U32_F32_GRAD_IR;
+  case NVPTX::TEX_3D_U32_F32_GRAD_RI:
+    return NVPTX::TEX_3D_U32_F32_GRAD_II;
+  case NVPTX::TEX_CUBE_F32_F32_RR:
+    return NVPTX::TEX_CUBE_F32_F32_IR;
+  case NVPTX::TEX_CUBE_F32_F32_RI:
+    return NVPTX::TEX_CUBE_F32_F32_II;
+  case NVPTX::TEX_CUBE_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_F32_F32_LEVEL_IR;
+  case NVPTX::TEX_CUBE_F32_F32_LEVEL_RI:
+    return NVPTX::TEX_CUBE_F32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_S32_F32_RR:
+    return NVPTX::TEX_CUBE_S32_F32_IR;
+  case NVPTX::TEX_CUBE_S32_F32_RI:
+    return NVPTX::TEX_CUBE_S32_F32_II;
+  case NVPTX::TEX_CUBE_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_S32_F32_LEVEL_IR;
+  case NVPTX::TEX_CUBE_S32_F32_LEVEL_RI:
+    return NVPTX::TEX_CUBE_S32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_U32_F32_RR:
+    return NVPTX::TEX_CUBE_U32_F32_IR;
+  case NVPTX::TEX_CUBE_U32_F32_RI:
+    return NVPTX::TEX_CUBE_U32_F32_II;
+  case NVPTX::TEX_CUBE_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_U32_F32_LEVEL_IR;
+  case NVPTX::TEX_CUBE_U32_F32_LEVEL_RI:
+    return NVPTX::TEX_CUBE_U32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_ARRAY_F32_F32_RR:
+    return NVPTX::TEX_CUBE_ARRAY_F32_F32_IR;
+  case NVPTX::TEX_CUBE_ARRAY_F32_F32_RI:
+    return NVPTX::TEX_CUBE_ARRAY_F32_F32_II;
+  case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_IR;
+  case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RI:
+    return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_ARRAY_S32_F32_RR:
+    return NVPTX::TEX_CUBE_ARRAY_S32_F32_IR;
+  case NVPTX::TEX_CUBE_ARRAY_S32_F32_RI:
+    return NVPTX::TEX_CUBE_ARRAY_S32_F32_II;
+  case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_IR;
+  case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RI:
+    return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_ARRAY_U32_F32_RR:
+    return NVPTX::TEX_CUBE_ARRAY_U32_F32_IR;
+  case NVPTX::TEX_CUBE_ARRAY_U32_F32_RI:
+    return NVPTX::TEX_CUBE_ARRAY_U32_F32_II;
+  case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_IR;
+  case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RI:
+    return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_II;
+  case NVPTX::TLD4_R_2D_F32_F32_RR:
+    return NVPTX::TLD4_R_2D_F32_F32_IR;
+  case NVPTX::TLD4_R_2D_F32_F32_RI:
+    return NVPTX::TLD4_R_2D_F32_F32_II;
+  case NVPTX::TLD4_G_2D_F32_F32_RR:
+    return NVPTX::TLD4_G_2D_F32_F32_IR;
+  case NVPTX::TLD4_G_2D_F32_F32_RI:
+    return NVPTX::TLD4_G_2D_F32_F32_II;
+  case NVPTX::TLD4_B_2D_F32_F32_RR:
+    return NVPTX::TLD4_B_2D_F32_F32_IR;
+  case NVPTX::TLD4_B_2D_F32_F32_RI:
+    return NVPTX::TLD4_B_2D_F32_F32_II;
+  case NVPTX::TLD4_A_2D_F32_F32_RR:
+    return NVPTX::TLD4_A_2D_F32_F32_IR;
+  case NVPTX::TLD4_A_2D_F32_F32_RI:
+    return NVPTX::TLD4_A_2D_F32_F32_II;
+  case NVPTX::TLD4_R_2D_S32_F32_RR:
+    return NVPTX::TLD4_R_2D_S32_F32_IR;
+  case NVPTX::TLD4_R_2D_S32_F32_RI:
+    return NVPTX::TLD4_R_2D_S32_F32_II;
+  case NVPTX::TLD4_G_2D_S32_F32_RR:
+    return NVPTX::TLD4_G_2D_S32_F32_IR;
+  case NVPTX::TLD4_G_2D_S32_F32_RI:
+    return NVPTX::TLD4_G_2D_S32_F32_II;
+  case NVPTX::TLD4_B_2D_S32_F32_RR:
+    return NVPTX::TLD4_B_2D_S32_F32_IR;
+  case NVPTX::TLD4_B_2D_S32_F32_RI:
+    return NVPTX::TLD4_B_2D_S32_F32_II;
+  case NVPTX::TLD4_A_2D_S32_F32_RR:
+    return NVPTX::TLD4_A_2D_S32_F32_IR;
+  case NVPTX::TLD4_A_2D_S32_F32_RI:
+    return NVPTX::TLD4_A_2D_S32_F32_II;
+  case NVPTX::TLD4_R_2D_U32_F32_RR:
+    return NVPTX::TLD4_R_2D_U32_F32_IR;
+  case NVPTX::TLD4_R_2D_U32_F32_RI:
+    return NVPTX::TLD4_R_2D_U32_F32_II;
+  case NVPTX::TLD4_G_2D_U32_F32_RR:
+    return NVPTX::TLD4_G_2D_U32_F32_IR;
+  case NVPTX::TLD4_G_2D_U32_F32_RI:
+    return NVPTX::TLD4_G_2D_U32_F32_II;
+  case NVPTX::TLD4_B_2D_U32_F32_RR:
+    return NVPTX::TLD4_B_2D_U32_F32_IR;
+  case NVPTX::TLD4_B_2D_U32_F32_RI:
+    return NVPTX::TLD4_B_2D_U32_F32_II;
+  case NVPTX::TLD4_A_2D_U32_F32_RR:
+    return NVPTX::TLD4_A_2D_U32_F32_IR;
+  case NVPTX::TLD4_A_2D_U32_F32_RI:
+    return NVPTX::TLD4_A_2D_U32_F32_II;
+  case NVPTX::TEX_UNIFIED_1D_F32_S32_R:
+    return NVPTX::TEX_UNIFIED_1D_F32_S32_I;
+  case NVPTX::TEX_UNIFIED_1D_F32_F32_R:
+    return NVPTX::TEX_UNIFIED_1D_F32_F32_I;
+  case NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_1D_S32_S32_R:
+    return NVPTX::TEX_UNIFIED_1D_S32_S32_I;
+  case NVPTX::TEX_UNIFIED_1D_S32_F32_R:
+    return NVPTX::TEX_UNIFIED_1D_S32_F32_I;
+  case NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_1D_U32_S32_R:
+    return NVPTX::TEX_UNIFIED_1D_U32_S32_I;
+  case NVPTX::TEX_UNIFIED_1D_U32_F32_R:
+    return NVPTX::TEX_UNIFIED_1D_U32_F32_I;
+  case NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_2D_F32_S32_R:
+    return NVPTX::TEX_UNIFIED_2D_F32_S32_I;
+  case NVPTX::TEX_UNIFIED_2D_F32_F32_R:
+    return NVPTX::TEX_UNIFIED_2D_F32_F32_I;
+  case NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_2D_S32_S32_R:
+    return NVPTX::TEX_UNIFIED_2D_S32_S32_I;
+  case NVPTX::TEX_UNIFIED_2D_S32_F32_R:
+    return NVPTX::TEX_UNIFIED_2D_S32_F32_I;
+  case NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_2D_U32_S32_R:
+    return NVPTX::TEX_UNIFIED_2D_U32_S32_I;
+  case NVPTX::TEX_UNIFIED_2D_U32_F32_R:
+    return NVPTX::TEX_UNIFIED_2D_U32_F32_I;
+  case NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_3D_F32_S32_R:
+    return NVPTX::TEX_UNIFIED_3D_F32_S32_I;
+  case NVPTX::TEX_UNIFIED_3D_F32_F32_R:
+    return NVPTX::TEX_UNIFIED_3D_F32_F32_I;
+  case NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_3D_S32_S32_R:
+    return NVPTX::TEX_UNIFIED_3D_S32_S32_I;
+  case NVPTX::TEX_UNIFIED_3D_S32_F32_R:
+    return NVPTX::TEX_UNIFIED_3D_S32_F32_I;
+  case NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_3D_U32_S32_R:
+    return NVPTX::TEX_UNIFIED_3D_U32_S32_I;
+  case NVPTX::TEX_UNIFIED_3D_U32_F32_R:
+    return NVPTX::TEX_UNIFIED_3D_U32_F32_I;
+  case NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R:
+    return NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_I;
+  case NVPTX::TEX_UNIFIED_CUBE_F32_F32_R:
+    return NVPTX::TEX_UNIFIED_CUBE_F32_F32_I;
+  case NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_CUBE_S32_F32_R:
+    return NVPTX::TEX_UNIFIED_CUBE_S32_F32_I;
+  case NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_CUBE_U32_F32_R:
+    return NVPTX::TEX_UNIFIED_CUBE_U32_F32_I;
+  case NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R:
+    return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_I;
+  case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R:
+    return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_I;
+  case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_I;
+  case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R:
+    return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_I;
+  case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R:
+    return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_I;
+  case NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R:
+    return NVPTX::TLD4_UNIFIED_R_2D_F32_F32_I;
+  case NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R:
+    return NVPTX::TLD4_UNIFIED_G_2D_F32_F32_I;
+  case NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R:
+    return NVPTX::TLD4_UNIFIED_B_2D_F32_F32_I;
+  case NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R:
+    return NVPTX::TLD4_UNIFIED_A_2D_F32_F32_I;
+  case NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R:
+    return NVPTX::TLD4_UNIFIED_R_2D_S32_F32_I;
+  case NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R:
+    return NVPTX::TLD4_UNIFIED_G_2D_S32_F32_I;
+  case NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R:
+    return NVPTX::TLD4_UNIFIED_B_2D_S32_F32_I;
+  case NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R:
+    return NVPTX::TLD4_UNIFIED_A_2D_S32_F32_I;
+  case NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R:
+    return NVPTX::TLD4_UNIFIED_R_2D_U32_F32_I;
+  case NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R:
+    return NVPTX::TLD4_UNIFIED_G_2D_U32_F32_I;
+  case NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R:
+    return NVPTX::TLD4_UNIFIED_B_2D_U32_F32_I;
+  case NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R:
+    return NVPTX::TLD4_UNIFIED_A_2D_U32_F32_I;
+  default:
+    llvm_unreachable("Unhandled TEX opcode");
+  };
+}
+
+static unsigned samplerRegisterToIndexOpcode(unsigned RegOC) {
+  switch (RegOC) {
+  case NVPTX::TEX_1D_F32_S32_RR:
+    return NVPTX::TEX_1D_F32_S32_RI;
+  case NVPTX::TEX_1D_F32_S32_IR:
+    return NVPTX::TEX_1D_F32_S32_II;
+  case NVPTX::TEX_1D_F32_F32_RR:
+    return NVPTX::TEX_1D_F32_F32_RI;
+  case NVPTX::TEX_1D_F32_F32_IR:
+    return NVPTX::TEX_1D_F32_F32_II;
+  case NVPTX::TEX_1D_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_F32_F32_LEVEL_RI;
+  case NVPTX::TEX_1D_F32_F32_LEVEL_IR:
+    return NVPTX::TEX_1D_F32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_F32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_F32_F32_GRAD_RI;
+  case NVPTX::TEX_1D_F32_F32_GRAD_IR:
+    return NVPTX::TEX_1D_F32_F32_GRAD_II;
+  case NVPTX::TEX_1D_S32_S32_RR:
+    return NVPTX::TEX_1D_S32_S32_RI;
+  case NVPTX::TEX_1D_S32_S32_IR:
+    return NVPTX::TEX_1D_S32_S32_II;
+  case NVPTX::TEX_1D_S32_F32_RR:
+    return NVPTX::TEX_1D_S32_F32_RI;
+  case NVPTX::TEX_1D_S32_F32_IR:
+    return NVPTX::TEX_1D_S32_F32_II;
+  case NVPTX::TEX_1D_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_S32_F32_LEVEL_RI;
+  case NVPTX::TEX_1D_S32_F32_LEVEL_IR:
+    return NVPTX::TEX_1D_S32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_S32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_S32_F32_GRAD_RI;
+  case NVPTX::TEX_1D_S32_F32_GRAD_IR:
+    return NVPTX::TEX_1D_S32_F32_GRAD_II;
+  case NVPTX::TEX_1D_U32_S32_RR:
+    return NVPTX::TEX_1D_U32_S32_RI;
+  case NVPTX::TEX_1D_U32_S32_IR:
+    return NVPTX::TEX_1D_U32_S32_II;
+  case NVPTX::TEX_1D_U32_F32_RR:
+    return NVPTX::TEX_1D_U32_F32_RI;
+  case NVPTX::TEX_1D_U32_F32_IR:
+    return NVPTX::TEX_1D_U32_F32_II;
+  case NVPTX::TEX_1D_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_U32_F32_LEVEL_RI;
+  case NVPTX::TEX_1D_U32_F32_LEVEL_IR:
+    return NVPTX::TEX_1D_U32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_U32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_U32_F32_GRAD_RI;
+  case NVPTX::TEX_1D_U32_F32_GRAD_IR:
+    return NVPTX::TEX_1D_U32_F32_GRAD_II;
+  case NVPTX::TEX_1D_ARRAY_F32_S32_RR:
+    return NVPTX::TEX_1D_ARRAY_F32_S32_RI;
+  case NVPTX::TEX_1D_ARRAY_F32_S32_IR:
+    return NVPTX::TEX_1D_ARRAY_F32_S32_II;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_RR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_RI;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_IR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_II;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RI;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_IR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RI;
+  case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_IR:
+    return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_II;
+  case NVPTX::TEX_1D_ARRAY_S32_S32_RR:
+    return NVPTX::TEX_1D_ARRAY_S32_S32_RI;
+  case NVPTX::TEX_1D_ARRAY_S32_S32_IR:
+    return NVPTX::TEX_1D_ARRAY_S32_S32_II;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_RR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_RI;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_IR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_II;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RI;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_IR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RI;
+  case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_IR:
+    return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_II;
+  case NVPTX::TEX_1D_ARRAY_U32_S32_RR:
+    return NVPTX::TEX_1D_ARRAY_U32_S32_RI;
+  case NVPTX::TEX_1D_ARRAY_U32_S32_IR:
+    return NVPTX::TEX_1D_ARRAY_U32_S32_II;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_RR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_RI;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_IR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_II;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RI;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_IR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_II;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RI;
+  case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_IR:
+    return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_II;
+  case NVPTX::TEX_2D_F32_S32_RR:
+    return NVPTX::TEX_2D_F32_S32_RI;
+  case NVPTX::TEX_2D_F32_S32_IR:
+    return NVPTX::TEX_2D_F32_S32_II;
+  case NVPTX::TEX_2D_F32_F32_RR:
+    return NVPTX::TEX_2D_F32_F32_RI;
+  case NVPTX::TEX_2D_F32_F32_IR:
+    return NVPTX::TEX_2D_F32_F32_II;
+  case NVPTX::TEX_2D_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_F32_F32_LEVEL_RI;
+  case NVPTX::TEX_2D_F32_F32_LEVEL_IR:
+    return NVPTX::TEX_2D_F32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_F32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_F32_F32_GRAD_RI;
+  case NVPTX::TEX_2D_F32_F32_GRAD_IR:
+    return NVPTX::TEX_2D_F32_F32_GRAD_II;
+  case NVPTX::TEX_2D_S32_S32_RR:
+    return NVPTX::TEX_2D_S32_S32_RI;
+  case NVPTX::TEX_2D_S32_S32_IR:
+    return NVPTX::TEX_2D_S32_S32_II;
+  case NVPTX::TEX_2D_S32_F32_RR:
+    return NVPTX::TEX_2D_S32_F32_RI;
+  case NVPTX::TEX_2D_S32_F32_IR:
+    return NVPTX::TEX_2D_S32_F32_II;
+  case NVPTX::TEX_2D_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_S32_F32_LEVEL_RI;
+  case NVPTX::TEX_2D_S32_F32_LEVEL_IR:
+    return NVPTX::TEX_2D_S32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_S32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_S32_F32_GRAD_RI;
+  case NVPTX::TEX_2D_S32_F32_GRAD_IR:
+    return NVPTX::TEX_2D_S32_F32_GRAD_II;
+  case NVPTX::TEX_2D_U32_S32_RR:
+    return NVPTX::TEX_2D_U32_S32_RI;
+  case NVPTX::TEX_2D_U32_S32_IR:
+    return NVPTX::TEX_2D_U32_S32_II;
+  case NVPTX::TEX_2D_U32_F32_RR:
+    return NVPTX::TEX_2D_U32_F32_RI;
+  case NVPTX::TEX_2D_U32_F32_IR:
+    return NVPTX::TEX_2D_U32_F32_II;
+  case NVPTX::TEX_2D_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_U32_F32_LEVEL_RI;
+  case NVPTX::TEX_2D_U32_F32_LEVEL_IR:
+    return NVPTX::TEX_2D_U32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_U32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_U32_F32_GRAD_RI;
+  case NVPTX::TEX_2D_U32_F32_GRAD_IR:
+    return NVPTX::TEX_2D_U32_F32_GRAD_II;
+  case NVPTX::TEX_2D_ARRAY_F32_S32_RR:
+    return NVPTX::TEX_2D_ARRAY_F32_S32_RI;
+  case NVPTX::TEX_2D_ARRAY_F32_S32_IR:
+    return NVPTX::TEX_2D_ARRAY_F32_S32_II;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_RR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_RI;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_IR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_II;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RI;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_IR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RI;
+  case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_IR:
+    return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_II;
+  case NVPTX::TEX_2D_ARRAY_S32_S32_RR:
+    return NVPTX::TEX_2D_ARRAY_S32_S32_RI;
+  case NVPTX::TEX_2D_ARRAY_S32_S32_IR:
+    return NVPTX::TEX_2D_ARRAY_S32_S32_II;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_RR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_RI;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_IR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_II;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RI;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_IR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RI;
+  case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_IR:
+    return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_II;
+  case NVPTX::TEX_2D_ARRAY_U32_S32_RR:
+    return NVPTX::TEX_2D_ARRAY_U32_S32_RI;
+  case NVPTX::TEX_2D_ARRAY_U32_S32_IR:
+    return NVPTX::TEX_2D_ARRAY_U32_S32_II;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_RR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_RI;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_IR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_II;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RI;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_IR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_II;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RI;
+  case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_IR:
+    return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_II;
+  case NVPTX::TEX_3D_F32_S32_RR:
+    return NVPTX::TEX_3D_F32_S32_RI;
+  case NVPTX::TEX_3D_F32_S32_IR:
+    return NVPTX::TEX_3D_F32_S32_II;
+  case NVPTX::TEX_3D_F32_F32_RR:
+    return NVPTX::TEX_3D_F32_F32_RI;
+  case NVPTX::TEX_3D_F32_F32_IR:
+    return NVPTX::TEX_3D_F32_F32_II;
+  case NVPTX::TEX_3D_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_3D_F32_F32_LEVEL_RI;
+  case NVPTX::TEX_3D_F32_F32_LEVEL_IR:
+    return NVPTX::TEX_3D_F32_F32_LEVEL_II;
+  case NVPTX::TEX_3D_F32_F32_GRAD_RR:
+    return NVPTX::TEX_3D_F32_F32_GRAD_RI;
+  case NVPTX::TEX_3D_F32_F32_GRAD_IR:
+    return NVPTX::TEX_3D_F32_F32_GRAD_II;
+  case NVPTX::TEX_3D_S32_S32_RR:
+    return NVPTX::TEX_3D_S32_S32_RI;
+  case NVPTX::TEX_3D_S32_S32_IR:
+    return NVPTX::TEX_3D_S32_S32_II;
+  case NVPTX::TEX_3D_S32_F32_RR:
+    return NVPTX::TEX_3D_S32_F32_RI;
+  case NVPTX::TEX_3D_S32_F32_IR:
+    return NVPTX::TEX_3D_S32_F32_II;
+  case NVPTX::TEX_3D_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_3D_S32_F32_LEVEL_RI;
+  case NVPTX::TEX_3D_S32_F32_LEVEL_IR:
+    return NVPTX::TEX_3D_S32_F32_LEVEL_II;
+  case NVPTX::TEX_3D_S32_F32_GRAD_RR:
+    return NVPTX::TEX_3D_S32_F32_GRAD_RI;
+  case NVPTX::TEX_3D_S32_F32_GRAD_IR:
+    return NVPTX::TEX_3D_S32_F32_GRAD_II;
+  case NVPTX::TEX_3D_U32_S32_RR:
+    return NVPTX::TEX_3D_U32_S32_RI;
+  case NVPTX::TEX_3D_U32_S32_IR:
+    return NVPTX::TEX_3D_U32_S32_II;
+  case NVPTX::TEX_3D_U32_F32_RR:
+    return NVPTX::TEX_3D_U32_F32_RI;
+  case NVPTX::TEX_3D_U32_F32_IR:
+    return NVPTX::TEX_3D_U32_F32_II;
+  case NVPTX::TEX_3D_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_3D_U32_F32_LEVEL_RI;
+  case NVPTX::TEX_3D_U32_F32_LEVEL_IR:
+    return NVPTX::TEX_3D_U32_F32_LEVEL_II;
+  case NVPTX::TEX_3D_U32_F32_GRAD_RR:
+    return NVPTX::TEX_3D_U32_F32_GRAD_RI;
+  case NVPTX::TEX_3D_U32_F32_GRAD_IR:
+    return NVPTX::TEX_3D_U32_F32_GRAD_II;
+  case NVPTX::TEX_CUBE_F32_F32_RR:
+    return NVPTX::TEX_CUBE_F32_F32_RI;
+  case NVPTX::TEX_CUBE_F32_F32_IR:
+    return NVPTX::TEX_CUBE_F32_F32_II;
+  case NVPTX::TEX_CUBE_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_F32_F32_LEVEL_RI;
+  case NVPTX::TEX_CUBE_F32_F32_LEVEL_IR:
+    return NVPTX::TEX_CUBE_F32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_S32_F32_RR:
+    return NVPTX::TEX_CUBE_S32_F32_RI;
+  case NVPTX::TEX_CUBE_S32_F32_IR:
+    return NVPTX::TEX_CUBE_S32_F32_II;
+  case NVPTX::TEX_CUBE_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_S32_F32_LEVEL_RI;
+  case NVPTX::TEX_CUBE_S32_F32_LEVEL_IR:
+    return NVPTX::TEX_CUBE_S32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_U32_F32_RR:
+    return NVPTX::TEX_CUBE_U32_F32_RI;
+  case NVPTX::TEX_CUBE_U32_F32_IR:
+    return NVPTX::TEX_CUBE_U32_F32_II;
+  case NVPTX::TEX_CUBE_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_U32_F32_LEVEL_RI;
+  case NVPTX::TEX_CUBE_U32_F32_LEVEL_IR:
+    return NVPTX::TEX_CUBE_U32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_ARRAY_F32_F32_RR:
+    return NVPTX::TEX_CUBE_ARRAY_F32_F32_RI;
+  case NVPTX::TEX_CUBE_ARRAY_F32_F32_IR:
+    return NVPTX::TEX_CUBE_ARRAY_F32_F32_II;
+  case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RI;
+  case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_IR:
+    return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_ARRAY_S32_F32_RR:
+    return NVPTX::TEX_CUBE_ARRAY_S32_F32_RI;
+  case NVPTX::TEX_CUBE_ARRAY_S32_F32_IR:
+    return NVPTX::TEX_CUBE_ARRAY_S32_F32_II;
+  case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RI;
+  case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_IR:
+    return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_II;
+  case NVPTX::TEX_CUBE_ARRAY_U32_F32_RR:
+    return NVPTX::TEX_CUBE_ARRAY_U32_F32_RI;
+  case NVPTX::TEX_CUBE_ARRAY_U32_F32_IR:
+    return NVPTX::TEX_CUBE_ARRAY_U32_F32_II;
+  case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR:
+    return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RI;
+  case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_IR:
+    return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_II;
+  case NVPTX::TLD4_R_2D_F32_F32_RR:
+    return NVPTX::TLD4_R_2D_F32_F32_RI;
+  case NVPTX::TLD4_R_2D_F32_F32_IR:
+    return NVPTX::TLD4_R_2D_F32_F32_II;
+  case NVPTX::TLD4_G_2D_F32_F32_RR:
+    return NVPTX::TLD4_G_2D_F32_F32_RI;
+  case NVPTX::TLD4_G_2D_F32_F32_IR:
+    return NVPTX::TLD4_G_2D_F32_F32_II;
+  case NVPTX::TLD4_B_2D_F32_F32_RR:
+    return NVPTX::TLD4_B_2D_F32_F32_RI;
+  case NVPTX::TLD4_B_2D_F32_F32_IR:
+    return NVPTX::TLD4_B_2D_F32_F32_II;
+  case NVPTX::TLD4_A_2D_F32_F32_RR:
+    return NVPTX::TLD4_A_2D_F32_F32_RI;
+  case NVPTX::TLD4_A_2D_F32_F32_IR:
+    return NVPTX::TLD4_A_2D_F32_F32_II;
+  case NVPTX::TLD4_R_2D_S32_F32_RR:
+    return NVPTX::TLD4_R_2D_S32_F32_RI;
+  case NVPTX::TLD4_R_2D_S32_F32_IR:
+    return NVPTX::TLD4_R_2D_S32_F32_II;
+  case NVPTX::TLD4_G_2D_S32_F32_RR:
+    return NVPTX::TLD4_G_2D_S32_F32_RI;
+  case NVPTX::TLD4_G_2D_S32_F32_IR:
+    return NVPTX::TLD4_G_2D_S32_F32_II;
+  case NVPTX::TLD4_B_2D_S32_F32_RR:
+    return NVPTX::TLD4_B_2D_S32_F32_RI;
+  case NVPTX::TLD4_B_2D_S32_F32_IR:
+    return NVPTX::TLD4_B_2D_S32_F32_II;
+  case NVPTX::TLD4_A_2D_S32_F32_RR:
+    return NVPTX::TLD4_A_2D_S32_F32_RI;
+  case NVPTX::TLD4_A_2D_S32_F32_IR:
+    return NVPTX::TLD4_A_2D_S32_F32_II;
+  case NVPTX::TLD4_R_2D_U32_F32_RR:
+    return NVPTX::TLD4_R_2D_U32_F32_RI;
+  case NVPTX::TLD4_R_2D_U32_F32_IR:
+    return NVPTX::TLD4_R_2D_U32_F32_II;
+  case NVPTX::TLD4_G_2D_U32_F32_RR:
+    return NVPTX::TLD4_G_2D_U32_F32_RI;
+  case NVPTX::TLD4_G_2D_U32_F32_IR:
+    return NVPTX::TLD4_G_2D_U32_F32_II;
+  case NVPTX::TLD4_B_2D_U32_F32_RR:
+    return NVPTX::TLD4_B_2D_U32_F32_RI;
+  case NVPTX::TLD4_B_2D_U32_F32_IR:
+    return NVPTX::TLD4_B_2D_U32_F32_II;
+  case NVPTX::TLD4_A_2D_U32_F32_RR:
+    return NVPTX::TLD4_A_2D_U32_F32_RI;
+  case NVPTX::TLD4_A_2D_U32_F32_IR:
+    return NVPTX::TLD4_A_2D_U32_F32_II;
+  default:
+    llvm_unreachable("Unhandled TEX opcode");
+  };
+}
+
+static unsigned queryRegisterToIndexOpcode(unsigned RegOC) {
+  switch (RegOC) {
+  case NVPTX::TXQ_CHANNEL_ORDER_R:
+    return NVPTX::TXQ_CHANNEL_ORDER_I;
+  case NVPTX::TXQ_CHANNEL_DATA_TYPE_R:
+    return NVPTX::TXQ_CHANNEL_DATA_TYPE_I;
+  case NVPTX::TXQ_WIDTH_R:
+    return NVPTX::TXQ_WIDTH_I;
+  case NVPTX::TXQ_HEIGHT_R:
+    return NVPTX::TXQ_HEIGHT_I;
+  case NVPTX::TXQ_DEPTH_R:
+    return NVPTX::TXQ_DEPTH_I;
+  case NVPTX::TXQ_ARRAY_SIZE_R:
+    return NVPTX::TXQ_ARRAY_SIZE_I;
+  case NVPTX::TXQ_NUM_SAMPLES_R:
+    return NVPTX::TXQ_NUM_SAMPLES_I;
+  case NVPTX::TXQ_NUM_MIPMAP_LEVELS_R:
+    return NVPTX::TXQ_NUM_MIPMAP_LEVELS_I;
+  case NVPTX::SUQ_CHANNEL_ORDER_R:
+    return NVPTX::SUQ_CHANNEL_ORDER_I;
+  case NVPTX::SUQ_CHANNEL_DATA_TYPE_R:
+    return NVPTX::SUQ_CHANNEL_DATA_TYPE_I;
+  case NVPTX::SUQ_WIDTH_R:
+    return NVPTX::SUQ_WIDTH_I;
+  case NVPTX::SUQ_HEIGHT_R:
+    return NVPTX::SUQ_HEIGHT_I;
+  case NVPTX::SUQ_DEPTH_R:
+    return NVPTX::SUQ_DEPTH_I;
+  case NVPTX::SUQ_ARRAY_SIZE_R:
+    return NVPTX::SUQ_ARRAY_SIZE_I;
+  default:
+    llvm_unreachable("Unhandled TXQ/SUQ opcode");
+  };
+}
+
 bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
   MachineFunction &MF = *MI.getParent()->getParent();
   const MCInstrDesc &MCID = MI.getDesc();
+  const NVPTXInstrInfo *TII = MF.getSubtarget<NVPTXSubtarget>().getInstrInfo();
 
   if (MCID.TSFlags & NVPTXII::IsTexFlag) {
     // This is a texture fetch, so operand 4 is a texref and operand 5 is
     // a samplerref
     MachineOperand &TexHandle = MI.getOperand(4);
-    replaceImageHandle(TexHandle, MF);
+    if (replaceImageHandle(TexHandle, MF))
+      MI.setDesc(TII->get(texRegisterToIndexOpcode(MI.getOpcode())));
 
     if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
       MachineOperand &SampHandle = MI.getOperand(5);
-      replaceImageHandle(SampHandle, MF);
+      if (replaceImageHandle(SampHandle, MF))
+        MI.setDesc(TII->get(samplerRegisterToIndexOpcode(MI.getOpcode())));
     }
 
     return true;
@@ -99,21 +1755,24 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
     // For a surface load of vector size N, the Nth operand will be the surfref
     MachineOperand &SurfHandle = MI.getOperand(VecSize);
 
-    replaceImageHandle(SurfHandle, MF);
+    if (replaceImageHandle(SurfHandle, MF))
+      MI.setDesc(TII->get(suldRegisterToIndexOpcode(MI.getOpcode())));
 
     return true;
   } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
     // This is a surface store, so operand 0 is a surfref
     MachineOperand &SurfHandle = MI.getOperand(0);
 
-    replaceImageHandle(SurfHandle, MF);
+    if (replaceImageHandle(SurfHandle, MF))
+      MI.setDesc(TII->get(sustRegisterToIndexOpcode(MI.getOpcode())));
 
     return true;
   } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
     // This is a query, so operand 1 is a surfref/texref
     MachineOperand &Handle = MI.getOperand(1);
 
-    replaceImageHandle(Handle, MF);
+    if (replaceImageHandle(Handle, MF))
+      MI.setDesc(TII->get(queryRegisterToIndexOpcode(MI.getOpcode())));
 
     return true;
   }
@@ -121,12 +1780,14 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
   return false;
 }
 
-void NVPTXReplaceImageHandles::
-replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
+bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op,
+                                                  MachineFunction &MF) {
   unsigned Idx;
   if (findIndexForHandle(Op, MF, Idx)) {
     Op.ChangeToImmediate(Idx);
+    return true;
   }
+  return false;
 }
 
 bool NVPTXReplaceImageHandles::

diff  --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
index c17c71e01d3e3..fee61951bb56d 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
-; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
+; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
 
 target triple = "nvptx-unknown-cuda"
 

diff  --git a/llvm/test/CodeGen/NVPTX/surf-read.ll b/llvm/test/CodeGen/NVPTX/surf-read.ll
index 7383722a35961..9eeb1c0fc70a4 100644
--- a/llvm/test/CodeGen/NVPTX/surf-read.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-read.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
 
 target triple = "nvptx-unknown-nvcl"
 

diff  --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
index da55a242bba6e..6c77616e86721 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
-; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
+; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
 
 target triple = "nvptx-unknown-cuda"
 

diff  --git a/llvm/test/CodeGen/NVPTX/surf-write.ll b/llvm/test/CodeGen/NVPTX/surf-write.ll
index 5098d2ae9e1c6..d5180e6c5d468 100644
--- a/llvm/test/CodeGen/NVPTX/surf-write.ll
+++ b/llvm/test/CodeGen/NVPTX/surf-write.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
 
 target triple = "nvptx-unknown-nvcl"
 

diff  --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index d5f7c1667f17b..09d2ff0967378 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
-; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
+; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
 
 
 target triple = "nvptx-unknown-cuda"

diff  --git a/llvm/test/CodeGen/NVPTX/tex-read.ll b/llvm/test/CodeGen/NVPTX/tex-read.ll
index 6e0fda69e4f5f..8638a42dc0eff 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
 
 target triple = "nvptx-unknown-nvcl"
 

diff  --git a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
index e56eb5dea18f6..203d0973b7cd8 100644
--- a/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
+++ b/llvm/test/CodeGen/NVPTX/texsurf-queries.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
-; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
+; RUN: llc < %s -march=nvptx -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
 
 target triple = "nvptx-unknown-cuda"
 


        


More information about the llvm-commits mailing list