[llvm] 3bc7ffd - [AMDGPU] Use v3f32 type in image instructions

Wed Feb 5 01:35:58 PST 2020

Author: Sebastian Neubauer
Date: 2020-02-05T10:35:41+01:00
New Revision: 3bc7ffdaabee8b8380dab66f7e1e200660723b10

URL: https://github.com/llvm/llvm-project/commit/3bc7ffdaabee8b8380dab66f7e1e200660723b10
DIFF: https://github.com/llvm/llvm-project/commit/3bc7ffdaabee8b8380dab66f7e1e200660723b10.diff

LOG: [AMDGPU] Use v3f32 type in image instructions

This should lower the amount of used registers for gfx9.

I updated some of the changed tests with the update script because
changing them by hand is tedious.

Differential Revision: https://reviews.llvm.org/D73884

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 98654c180ff8..935e98f36241 100644

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5119,6 +5119,9 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
   } else if (Elts.size() == 2) {
     Type = MVT::v2f32;
     NumElts = 2;
+  } else if (Elts.size() == 3) {
+    Type = MVT::v3f32;
+    NumElts = 3;
   } else if (Elts.size() <= 4) {
     Type = MVT::v4f32;
     NumElts = 4;

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
index 51471769fafd..9e6be563c383 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
@@ -35,8 +35,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}image_load_mip_v4f16:
-; UNPACKED: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
-; PACKED: image_load_mip v[0:1], v[0:3], s[0:7] dmask:0xf unorm d16{{$}}
+; UNPACKED: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm d16{{$}}
+; PACKED: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf unorm d16{{$}}
 ; GFX10: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}}
 define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
 main_body:
@@ -46,8 +46,8 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}image_load_3d_v2f16:
-; UNPACKED: image_load v[0:1], v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
-; PACKED: image_load v0, v[0:3], s[0:7] dmask:0x3 unorm d16{{$}}
+; UNPACKED: image_load v[0:1], v[0:2], s[0:7] dmask:0x3 unorm d16{{$}}
+; PACKED: image_load v0, v[0:2], s[0:7] dmask:0x3 unorm d16{{$}}
 ; GFX10: image_load v0, v[0:2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm d16{{$}}
 define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
 main_body:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
index c57c27d0ccfe..e7d2fb202961 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
@@ -1,34 +1,122 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,GFX8910,SIVI,PRT %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789,PRT %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900,NOPRT %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-
-; GCN-LABEL: {{^}}load_1d:
-; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FIJI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-prt-strict-null -verify-machineinstrs < %s | FileCheck -check-prefixes=NOPRT %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s
+
 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) {
+; VERDE-LABEL: load_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1d_tfe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm tfe{{$}}
-; GFX10: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; VERDE-LABEL: load_1d_tfe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v5, s[0:7] dmask:0xf unorm tfe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_tfe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v5, s[0:7] dmask:0xf unorm tfe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_tfe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v5, s[0:7] dmask:0xf unorm tfe
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_tfe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v0, s[0:7] dmask:0xf unorm tfe
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s9 ; encoding: [0x09,0x02,0x0c,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x1f,0x01,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v5, s8 ; encoding: [0x08,0x02,0x0a,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[5:6], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x05,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -37,22 +125,82 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_1d_lwe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}}
-; GFX10: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; VERDE-LABEL: load_1d_lwe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v5, s[0:7] dmask:0xf unorm lwe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_lwe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v5, s[0:7] dmask:0xf unorm lwe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_lwe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v5, s[0:7] dmask:0xf unorm lwe
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_lwe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v0, s[0:7] dmask:0xf unorm lwe
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_lwe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s9 ; encoding: [0x09,0x02,0x0c,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v5, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; encoding: [0x00,0x1f,0x02,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v5, s8 ; encoding: [0x08,0x02,0x0a,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[5:6], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x05,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -61,31 +209,122 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_2d:
-; GFX6789: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; VERDE-LABEL: load_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_2d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_2d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_2d_tfe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
-; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
+; VERDE-LABEL: load_2d_tfe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_2d_tfe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_2d_tfe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_2d_tfe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v[0:1], s[0:7] dmask:0xf unorm tfe
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2d_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, s9 ; encoding: [0x09,0x02,0x0e,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x1f,0x01,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s8 ; encoding: [0x08,0x02,0x0c,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -94,31 +333,126 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_3d:
-; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+; VERDE-LABEL: load_3d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_3d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_3d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_3d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_3d_tfe_lwe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
-; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) {
+; VERDE-LABEL: load_3d_tfe_lwe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v7, v2
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_3d_tfe_lwe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v7, v2
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_3d_tfe_lwe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v7, v2
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_3d_tfe_lwe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v[0:2], s[0:7] dmask:0xf unorm tfe lwe
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_3d_tfe_lwe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe ; encoding: [0x10,0x1f,0x03,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -127,31 +461,126 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_cube:
-; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; VERDE-LABEL: load_cube:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_cube:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_cube:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_cube:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_cube_lwe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
-; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
+; VERDE-LABEL: load_cube_lwe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v7, v2
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_cube_lwe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v7, v2
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_cube_lwe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v7, v2
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_cube_lwe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v[0:2], s[0:7] dmask:0xf unorm lwe da
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_cube_lwe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm lwe ; encoding: [0x18,0x1f,0x02,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -160,31 +589,122 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_1darray:
-; GFX6789: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) {
+; VERDE-LABEL: load_1darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1darray_tfe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
-; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) {
+; VERDE-LABEL: load_1darray_tfe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe da
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1darray_tfe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe da
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1darray_tfe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe da
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1darray_tfe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v[0:1], s[0:7] dmask:0xf unorm tfe da
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1darray_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, s9 ; encoding: [0x09,0x02,0x0e,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm tfe ; encoding: [0x20,0x1f,0x01,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s8 ; encoding: [0x08,0x02,0x0c,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -193,31 +713,126 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_2darray:
-; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; VERDE-LABEL: load_2darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_2darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_2darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_2darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_2darray_lwe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
-; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
+; VERDE-LABEL: load_2darray_lwe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v7, v2
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_2darray_lwe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v7, v2
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_2darray_lwe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v7, v2
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm lwe da
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_2darray_lwe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v[0:2], s[0:7] dmask:0xf unorm lwe da
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2darray_lwe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm lwe ; encoding: [0x28,0x1f,0x02,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -226,31 +841,126 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_2dmsaa:
-; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) {
+; VERDE-LABEL: load_2dmsaa:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_2dmsaa:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_2dmsaa:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_2dmsaa:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x30,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_2dmsaa_both:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe lwe{{$}}
-; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
+; VERDE-LABEL: load_2dmsaa_both:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v7, v2
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_2dmsaa_both:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v7, v2
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_2dmsaa_both:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v7, v2
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_2dmsaa_both:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v[0:2], s[0:7] dmask:0xf unorm tfe lwe
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2dmsaa_both:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; encoding: [0x30,0x1f,0x03,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -259,31 +969,130 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_2darraymsaa:
-; GFX6789: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+; VERDE-LABEL: load_2darraymsaa:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_2darraymsaa:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_2darraymsaa:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_2darraymsaa:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x38,0x1f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_2darraymsaa_tfe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe da{{$}}
-; GFX10: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+; VERDE-LABEL: load_2darraymsaa_tfe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v8, v3
+; VERDE-NEXT:    v_mov_b32_e32 v7, v2
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_2darraymsaa_tfe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v8, v3
+; FIJI-NEXT:    v_mov_b32_e32 v7, v2
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_2darraymsaa_tfe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v8, v3
+; GFX6789-NEXT:    v_mov_b32_e32 v7, v2
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_2darraymsaa_tfe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load v[0:4], v[0:3], s[0:7] dmask:0xf unorm tfe da
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_2darraymsaa_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v8, v3 ; encoding: [0x03,0x03,0x10,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; encoding: [0x38,0x1f,0x01,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -292,31 +1101,122 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_mip_1d:
-; GFX6789: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) {
+; VERDE-LABEL: load_mip_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_1d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_1d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_1d_lwe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe{{$}}
-; GFX10: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) {
+; VERDE-LABEL: load_mip_1d_lwe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf unorm lwe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_1d_lwe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf unorm lwe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_1d_lwe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf unorm lwe
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_1d_lwe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load_mip v[0:4], v[0:1], s[0:7] dmask:0xf unorm lwe
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_1d_lwe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, s9 ; encoding: [0x09,0x02,0x0e,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load_mip v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm lwe ; encoding: [0x00,0x1f,0x06,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s8 ; encoding: [0x08,0x02,0x0c,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -325,31 +1225,126 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_mip_2d:
-; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+; VERDE-LABEL: load_mip_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_2d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_2d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_2d_tfe:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v4, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT-NOT: v_mov_b32_e32 v3
-; GFX6789: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm tfe{{$}}
-; GFX10: image_load_mip v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ;
-; SIVI: buffer_store_dword v4, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v4
 define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) {
+; VERDE-LABEL: load_mip_2d_tfe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v7, v2
+; VERDE-NEXT:    v_mov_b32_e32 v6, v1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_2d_tfe:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v5, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v7, v2
+; FIJI-NEXT:    v_mov_b32_e32 v6, v1
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v4, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_2d_tfe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v7, v2
+; GFX6789-NEXT:    v_mov_b32_e32 v6, v1
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[5:6], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_2d_tfe:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
+; NOPRT-NEXT:    image_load_mip v[0:4], v[0:2], s[0:7] dmask:0xf unorm tfe
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v6, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[5:6], v4, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2d_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, v2 ; encoding: [0x02,0x03,0x0e,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, v1 ; encoding: [0x01,0x03,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v11, s9 ; encoding: [0x09,0x02,0x16,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v10, s8 ; encoding: [0x08,0x02,0x14,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load_mip v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x1f,0x05,0xf0,0x05,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[10:11], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -358,14 +1353,51 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; Make sure that error flag is returned even with dmask 0
-; GCN-LABEL: {{^}}load_1d_V2_tfe_dmask0:
-; GCN: v_mov_b32_e32 v1, 0
-; PRT-DAG: v_mov_b32_e32 v2, v1
-; PRT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 define amdgpu_ps float @load_1d_V2_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) {
+; VERDE-LABEL: load_1d_V2_tfe_dmask0:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v1, 0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v1
+; VERDE-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v2
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_V2_tfe_dmask0:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v1, 0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v1
+; FIJI-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v0, v2
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_V2_tfe_dmask0:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v1
+; GFX6789-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v2
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_V2_tfe_dmask0:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v1, 0
+; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    v_mov_b32_e32 v0, v1
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_V2_tfe_dmask0:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0 ; encoding: [0x80,0x02,0x02,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v2, v1 ; encoding: [0x01,0x03,0x04,0x7e]
+; GFX10-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x11,0x01,0xf0,0x00,0x01,0x00,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
   %v.err = extractvalue {<2 x float>, i32} %v, 1
@@ -373,13 +1405,51 @@ main_body:
   ret float %vv
 }
 
-; GCN-LABEL: {{^}}load_1d_V1_tfe_dmask0:
-; GCN: v_mov_b32_e32 v1, 0
-; PRT-DAG: v_mov_b32_e32 v2, v1
-; PRT: image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT: image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe{{$}}
 define amdgpu_ps float @load_1d_V1_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s) {
+; VERDE-LABEL: load_1d_V1_tfe_dmask0:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v1, 0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v1
+; VERDE-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v2
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_V1_tfe_dmask0:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v1, 0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v1
+; FIJI-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v0, v2
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_V1_tfe_dmask0:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v1
+; GFX6789-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 unorm tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v2
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_V1_tfe_dmask0:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v1, 0
+; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x1 unorm tfe
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    v_mov_b32_e32 v0, v1
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_V1_tfe_dmask0:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0 ; encoding: [0x80,0x02,0x02,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v2, v1 ; encoding: [0x01,0x03,0x04,0x7e]
+; GFX10-NEXT:    image_load v[1:2], v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x11,0x01,0xf0,0x00,0x01,0x00,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32 0, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
   %v.err = extractvalue {float, i32} %v, 1
@@ -387,13 +1457,51 @@ main_body:
   ret float %vv
 }
 
-; GCN-LABEL: {{^}}load_mip_2d_tfe_dmask0:
-; GCN: v_mov_b32_e32 v3, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
-; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 define amdgpu_ps float @load_mip_2d_tfe_dmask0(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+; VERDE-LABEL: load_mip_2d_tfe_dmask0:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v3, 0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v3
+; VERDE-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v4
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_2d_tfe_dmask0:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v3, 0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v3
+; FIJI-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v0, v4
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_2d_tfe_dmask0:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v3, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v3
+; GFX6789-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v4
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_2d_tfe_dmask0:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
+; NOPRT-NEXT:    image_load_mip v[2:3], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2d_tfe_dmask0:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v4, v3 ; encoding: [0x03,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x11,0x05,0xf0,0x00,0x03,0x00,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_mov_b32_e32 v0, v4 ; encoding: [0x04,0x03,0x00,0x7e]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 0, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
   %v.err = extractvalue {<4 x float>, i32} %v, 1
@@ -401,14 +1509,51 @@ main_body:
   ret float %vv
 }
 
-; Do not make dmask 0 even if no result (other than tfe) is used.
-; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse:
-; GCN: v_mov_b32_e32 v3, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
-; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 define amdgpu_ps float @load_mip_2d_tfe_nouse(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+; VERDE-LABEL: load_mip_2d_tfe_nouse:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v3, 0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v3
+; VERDE-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v4
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_2d_tfe_nouse:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v3, 0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v3
+; FIJI-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v0, v4
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_2d_tfe_nouse:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v3, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v3
+; GFX6789-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v4
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_2d_tfe_nouse:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
+; NOPRT-NEXT:    image_load_mip v[2:3], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2d_tfe_nouse:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v4, v3 ; encoding: [0x03,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x11,0x05,0xf0,0x00,0x03,0x00,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_mov_b32_e32 v0, v4 ; encoding: [0x04,0x03,0x00,0x7e]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
   %v.err = extractvalue {<4 x float>, i32} %v, 1
@@ -416,13 +1561,51 @@ main_body:
   ret float %vv
 }
 
-; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse_V2:
-; GCN: v_mov_b32_e32 v3, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
-; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 unorm tfe{{$}}
 define amdgpu_ps float @load_mip_2d_tfe_nouse_V2(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+; VERDE-LABEL: load_mip_2d_tfe_nouse_V2:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v3, 0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v3
+; VERDE-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v4
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_2d_tfe_nouse_V2:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v3, 0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v3
+; FIJI-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v0, v4
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_2d_tfe_nouse_V2:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v3, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v3
+; GFX6789-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v4
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_2d_tfe_nouse_V2:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
+; NOPRT-NEXT:    image_load_mip v[2:3], v[0:2], s[0:7] dmask:0x1 unorm tfe
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2d_tfe_nouse_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v4, v3 ; encoding: [0x03,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x11,0x05,0xf0,0x00,0x03,0x00,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_mov_b32_e32 v0, v4 ; encoding: [0x04,0x03,0x00,0x7e]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32 6, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
   %v.err = extractvalue {<2 x float>, i32} %v, 1
@@ -430,13 +1613,51 @@ main_body:
   ret float %vv
 }
 
-; GCN-LABEL: {{^}}load_mip_2d_tfe_nouse_V1:
-; GCN: v_mov_b32_e32 v3, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v3
-; PRT: image_load_mip v[3:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 unorm tfe{{$}}
-; NOPRT-NOT: v_mov_b32_e32 v2
-; NOPRT: image_load_mip v[2:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 unorm tfe{{$}}
 define amdgpu_ps float @load_mip_2d_tfe_nouse_V1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
+; VERDE-LABEL: load_mip_2d_tfe_nouse_V1:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v3, 0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v3
+; VERDE-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x2 unorm tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v4
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_2d_tfe_nouse_V1:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v3, 0
+; FIJI-NEXT:    v_mov_b32_e32 v4, v3
+; FIJI-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x2 unorm tfe
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v0, v4
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_2d_tfe_nouse_V1:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v3, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v3
+; GFX6789-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x2 unorm tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v4
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_2d_tfe_nouse_V1:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
+; NOPRT-NEXT:    image_load_mip v[2:3], v[0:2], s[0:7] dmask:0x2 unorm tfe
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2d_tfe_nouse_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v4, v3 ; encoding: [0x03,0x03,0x08,0x7e]
+; GFX10-NEXT:    image_load_mip v[3:4], v[0:2], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D unorm tfe ; encoding: [0x08,0x12,0x05,0xf0,0x00,0x03,0x00,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_mov_b32_e32 v0, v4 ; encoding: [0x04,0x03,0x00,0x7e]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {float, i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32 2, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
   %v.err = extractvalue {float, i32} %v, 1
@@ -444,21 +1665,78 @@ main_body:
   ret float %vv
 }
 
-; Check for dmask being materially smaller than return type
-; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask3:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v3, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; NOPRT-NOT: v_mov_b32_e32 v2
-; GFX6789: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 unorm tfe{{$}}
-; GFX10: image_load v[0:3], v{{[0-9]+}}, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ;
-; SIVI: buffer_store_dword v3, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v3
 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; VERDE-LABEL: load_1d_tfe_V4_dmask3:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    image_load v[0:3], v4, s[0:7] dmask:0x7 unorm tfe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v3, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_tfe_V4_dmask3:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v4, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    image_load v[0:3], v4, s[0:7] dmask:0x7 unorm tfe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v3, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_tfe_V4_dmask3:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    image_load v[0:3], v4, s[0:7] dmask:0x7 unorm tfe
+; GFX6789-NEXT:    v_mov_b32_e32 v4, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v5, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[4:5], v3, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_tfe_V4_dmask3:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
+; NOPRT-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0x7 unorm tfe
+; NOPRT-NEXT:    v_mov_b32_e32 v4, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v5, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[4:5], v3, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_tfe_V4_dmask3:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v5, s9 ; encoding: [0x09,0x02,0x0a,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    image_load v[0:3], v4, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x17,0x01,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v4, s8 ; encoding: [0x08,0x02,0x08,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[4:5], v3, off ; encoding: [0x00,0x80,0x70,0xdc,0x04,0x03,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -467,18 +1745,74 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask2:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v2, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; NOPRT-NOT: v_mov_b32_e32 v1
-; GFX6789: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 unorm tfe{{$}}
-; GFX10: image_load v[0:2], v{{[0-9]+}}, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ;
-; SIVI: buffer_store_dword v2, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v2
 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; VERDE-LABEL: load_1d_tfe_V4_dmask2:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    image_load v[0:2], v3, s[0:7] dmask:0x6 unorm tfe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v2, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_tfe_V4_dmask2:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v3, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    image_load v[0:2], v3, s[0:7] dmask:0x6 unorm tfe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v2, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_tfe_V4_dmask2:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    image_load v[0:2], v3, s[0:7] dmask:0x6 unorm tfe
+; GFX6789-NEXT:    v_mov_b32_e32 v3, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v4, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[3:4], v2, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_tfe_V4_dmask2:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v2, 0
+; NOPRT-NEXT:    image_load v[0:2], v0, s[0:7] dmask:0x6 unorm tfe
+; NOPRT-NEXT:    v_mov_b32_e32 v3, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v4, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[3:4], v2, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_tfe_V4_dmask2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, s9 ; encoding: [0x09,0x02,0x08,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    image_load v[0:2], v3, s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x16,0x01,0xf0,0x03,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v3, s8 ; encoding: [0x08,0x02,0x06,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[3:4], v2, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -487,16 +1821,70 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_1d_tfe_V4_dmask1:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v1, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; GFX6789: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
-; GFX10: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ;
-; SIVI: buffer_store_dword v1, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1
 define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; VERDE-LABEL: load_1d_tfe_V4_dmask1:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v1, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_tfe_V4_dmask1:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v1, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_tfe_V4_dmask1:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe
+; GFX6789-NEXT:    v_mov_b32_e32 v2, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v3, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[2:3], v1, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_tfe_V4_dmask1:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v1, 0
+; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x8 unorm tfe
+; NOPRT-NEXT:    v_mov_b32_e32 v2, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v3, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[2:3], v1, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_tfe_V4_dmask1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, s9 ; encoding: [0x09,0x02,0x06,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x18,0x01,0xf0,0x02,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v2, s8 ; encoding: [0x08,0x02,0x04,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[2:3], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x02,0x01,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -505,16 +1893,70 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}load_1d_tfe_V2_dmask1:
-; PRT: v_mov_b32_e32 v0, 0
-; PRT-DAG: v_mov_b32_e32 v{{[0-9]+}}, v0
-; NOPRT: v_mov_b32_e32 v1, 0
-; NOPRT-NOT: v_mov_b32_e32 v0
-; GFX6789: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 unorm tfe{{$}}
-; GFX10: image_load v[0:1], v{{[0-9]+}}, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ;
-; SIVI: buffer_store_dword v1, off, s[8:11], 0
-; GFX900: global_store_dword v[{{[0-9]+:[0-9]+}}], v1
 define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
+; VERDE-LABEL: load_1d_tfe_V2_dmask1:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe
+; VERDE-NEXT:    s_mov_b32 s11, 0xf000
+; VERDE-NEXT:    s_mov_b32 s10, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v1, off, s[8:11], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_tfe_V2_dmask1:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    v_mov_b32_e32 v2, v0
+; FIJI-NEXT:    v_mov_b32_e32 v0, 0
+; FIJI-NEXT:    v_mov_b32_e32 v1, v0
+; FIJI-NEXT:    image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe
+; FIJI-NEXT:    s_mov_b32 s11, 0xf000
+; FIJI-NEXT:    s_mov_b32 s10, -1
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    buffer_store_dword v1, off, s[8:11], 0
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_tfe_V2_dmask1:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    image_load v[0:1], v2, s[0:7] dmask:0x8 unorm tfe
+; GFX6789-NEXT:    v_mov_b32_e32 v2, s8
+; GFX6789-NEXT:    v_mov_b32_e32 v3, s9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[2:3], v1, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_tfe_V2_dmask1:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    v_mov_b32_e32 v1, 0
+; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x8 unorm tfe
+; NOPRT-NEXT:    v_mov_b32_e32 v2, s8
+; NOPRT-NEXT:    v_mov_b32_e32 v3, s9
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    global_store_dword v[2:3], v1, off
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_tfe_V2_dmask1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, s9 ; encoding: [0x09,0x02,0x06,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    image_load v[0:1], v2, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm tfe ; encoding: [0x00,0x18,0x01,0xf0,0x02,0x00,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v2, s8 ; encoding: [0x08,0x02,0x04,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[2:3], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x02,0x01,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue {<2 x float>, i32} %v, 0
@@ -524,348 +1966,1276 @@ main_body:
 }
 
 
-; GCN-LABEL: {{^}}load_mip_3d:
-; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) {
+; VERDE-LABEL: load_mip_3d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_3d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_3d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_3d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_cube:
-; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+; VERDE-LABEL: load_mip_cube:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_cube:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_cube:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_cube:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_1darray:
-; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) {
+; VERDE-LABEL: load_mip_1darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_1darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_1darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_1darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_mip_2darray:
-; GFX6789: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+; VERDE-LABEL: load_mip_2darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_mip_2darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_mip_2darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_mip_2darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_mip_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x04,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}store_1d:
-; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+; VERDE-LABEL: store_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_1d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_1d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_2d:
-; GFX6789: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+; VERDE-LABEL: store_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_2d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_2d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_3d:
-; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) {
+; VERDE-LABEL: store_3d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_3d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_3d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_3d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_cube:
-; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
+; VERDE-LABEL: store_cube:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_cube:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_cube:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_cube:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_1darray:
-; GFX6789: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) {
+; VERDE-LABEL: store_1darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_1darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_1darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_1darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_2darray:
-; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) {
+; VERDE-LABEL: store_2darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_2darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_2darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_2darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_2dmsaa:
-; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) {
+; VERDE-LABEL: store_2dmsaa:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_2dmsaa:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_2dmsaa:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_2dmsaa:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x30,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_2darraymsaa:
-; GFX6789: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_store v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+; VERDE-LABEL: store_2darraymsaa:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_2darraymsaa:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_2darraymsaa:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_2darraymsaa:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x38,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_1d:
-; GFX6789: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) {
+; VERDE-LABEL: store_mip_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_mip_1d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_mip_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_mip_1d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_2d:
-; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) {
+; VERDE-LABEL: store_mip_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_mip_2d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_mip_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_mip_2d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_3d:
-; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) {
+; VERDE-LABEL: store_mip_3d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_mip_3d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_mip_3d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_mip_3d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_cube:
-; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+; VERDE-LABEL: store_mip_cube:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_mip_cube:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_mip_cube:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_mip_cube:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_1darray:
-; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) {
+; VERDE-LABEL: store_mip_1darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_mip_1darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_mip_1darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_mip_1darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_mip_2darray:
-; GFX6789: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) {
+; VERDE-LABEL: store_mip_2darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_mip_2darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_mip_2darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_mip_2darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_mip_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x24,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}getresinfo_1d:
-; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) {
+; VERDE-LABEL: getresinfo_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_1d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_1d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_2d:
-; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ;
 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) {
+; VERDE-LABEL: getresinfo_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_2d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_2d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_3d:
-; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ;
 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) {
+; VERDE-LABEL: getresinfo_3d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_3d:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_3d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_3d:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm ; encoding: [0x10,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_cube:
-; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ;
 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) {
+; VERDE-LABEL: getresinfo_cube:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_cube:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_cube:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_cube:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm ; encoding: [0x18,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_1darray:
-; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ;
 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) {
+; VERDE-LABEL: getresinfo_1darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_1darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_1darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_1darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm ; encoding: [0x20,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_2darray:
-; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ;
 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) {
+; VERDE-LABEL: getresinfo_2darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_2darray:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_2darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_2darray:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_2dmsaa:
-; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
-; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ;
 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) {
+; VERDE-LABEL: getresinfo_2dmsaa:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_2dmsaa:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_2dmsaa:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_2dmsaa:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2dmsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm ; encoding: [0x30,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}getresinfo_2darraymsaa:
-; GFX6789: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}}
-; GFX10: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ;
 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) {
+; VERDE-LABEL: getresinfo_2darraymsaa:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_2darraymsaa:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_2darraymsaa:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_2darraymsaa:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_2darraymsaa:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm ; encoding: [0x38,0x1f,0x38,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1d_V1:
-; GFX6789: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}}
-; GFX10: image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) {
+; VERDE-LABEL: load_1d_V1:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_V1:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_V1:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_V1:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v0, v0, s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x18,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret float %v
 }
 
-; GCN-LABEL: {{^}}load_1d_V2:
-; GFX6789: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}}
-; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) {
+; VERDE-LABEL: load_1d_V2:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_V2:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_V2:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_V2:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:1], v0, s[0:7] dmask:0x9 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x19,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret <2 x float> %v
 }
 
-; GCN-LABEL: {{^}}store_1d_V1:
-; GFX6789: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}}
-; GFX10: image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) {
+; VERDE-LABEL: store_1d_V1:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_1d_V1:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_1d_V1:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_1d_V1:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v0, v1, s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x12,0x20,0xf0,0x01,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_1d_V2:
-; GFX6789: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}}
-; GFX10: image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) {
+; VERDE-LABEL: store_1d_V2:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_1d_V2:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_1d_V2:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_1d_V2:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:1], v2, s[0:7] dmask:0xc dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1c,0x20,0xf0,0x02,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
 
-; GCN-LABEL: {{^}}load_1d_glc:
-; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}}
-; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ;
 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) {
+; VERDE-LABEL: load_1d_glc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_glc:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_glc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_glc:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_glc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x3f,0x00,0xf0,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1d_slc:
-; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}}
-; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ;
 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) {
+; VERDE-LABEL: load_1d_slc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_slc:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_slc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_slc:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ; encoding: [0x00,0x1f,0x00,0xf2,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}load_1d_glc_slc:
-; GFX6789: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}}
-; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ;
 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) {
+; VERDE-LABEL: load_1d_glc_slc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: load_1d_glc_slc:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: load_1d_glc_slc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: load_1d_glc_slc:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: load_1d_glc_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ; encoding: [0x00,0x3f,0x00,0xf2,0x00,0x00,0x00,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}store_1d_glc:
-; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}}
-; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ;
 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+; VERDE-LABEL: store_1d_glc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_1d_glc:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_1d_glc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_1d_glc:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_glc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc ; encoding: [0x00,0x3f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_1d_slc:
-; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}}
-; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ;
 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+; VERDE-LABEL: store_1d_slc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_1d_slc:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_1d_slc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_1d_slc:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm slc ; encoding: [0x00,0x1f,0x20,0xf2,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
   ret void
 }
 
-; GCN-LABEL: {{^}}store_1d_glc_slc:
-; GFX6789: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}}
-; GFX10: image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ;
 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+; VERDE-LABEL: store_1d_glc_slc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: store_1d_glc_slc:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: store_1d_glc_slc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: store_1d_glc_slc:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: store_1d_glc_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc slc ; encoding: [0x00,0x3f,0x20,0xf2,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3)
   ret void
 }
 
-; GCN-LABEL: {{^}}getresinfo_dmask0:
-; GCN-NOT: image
-; GCN: ; return to shader part epilog
 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
+; VERDE-LABEL: getresinfo_dmask0:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: getresinfo_dmask0:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: getresinfo_dmask0:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: getresinfo_dmask0:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: getresinfo_dmask0:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
   ret <4 x float> %r
 }
 
-; Ideally, the register allocator would avoid the wait here
 ;
-; GCN-LABEL: {{^}}image_store_wait:
-; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf
-; SI: s_waitcnt expcnt(0)
-; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf
-; GCN: s_waitcnt vmcnt(0)
-; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf
 define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
+; VERDE-LABEL: image_store_wait:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt expcnt(0)
+; VERDE-NEXT:    image_load v[0:3], v4, s[8:15] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    image_store v[0:3], v4, s[16:23] dmask:0xf unorm
+; VERDE-NEXT:    s_endpgm
+;
+; FIJI-LABEL: image_store_wait:
+; FIJI:       ; %bb.0: ; %main_body
+; FIJI-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; FIJI-NEXT:    image_load v[0:3], v4, s[8:15] dmask:0xf unorm
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    image_store v[0:3], v4, s[16:23] dmask:0xf unorm
+; FIJI-NEXT:    s_endpgm
+;
+; GFX6789-LABEL: image_store_wait:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; GFX6789-NEXT:    image_load v[0:3], v4, s[8:15] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    image_store v[0:3], v4, s[16:23] dmask:0xf unorm
+; GFX6789-NEXT:    s_endpgm
+;
+; NOPRT-LABEL: image_store_wait:
+; NOPRT:       ; %bb.0: ; %main_body
+; NOPRT-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
+; NOPRT-NEXT:    image_load v[0:3], v4, s[8:15] dmask:0xf unorm
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    image_store v[0:3], v4, s[16:23] dmask:0xf unorm
+; NOPRT-NEXT:    s_endpgm
+;
+; GFX10-LABEL: image_store_wait:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+; GFX10-NEXT:    image_load v[0:3], v4, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    image_store v[0:3], v4, s[16:23] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x04,0x00]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 main_body:
   call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
   %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
@@ -873,12 +3243,61 @@ main_body:
   ret void
 }
 
-; SI won't merge ds memory operations, because of the signed offset bug, so
-; we only have check lines for VI+.
-; GFX8910-LABEL: image_load_mmo
-; GFX8910: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
-; GFX8910: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
 define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
+; VERDE-LABEL: image_load_mmo:
+; VERDE:       ; %bb.0:
+; VERDE-NEXT:    image_load v1, v[1:2], s[0:7] dmask:0x1 unorm
+; VERDE-NEXT:    v_mov_b32_e32 v2, 0
+; VERDE-NEXT:    s_mov_b32 m0, -1
+; VERDE-NEXT:    v_add_i32_e32 v3, vcc, 16, v0
+; VERDE-NEXT:    ds_write_b32 v0, v2
+; VERDE-NEXT:    ds_write_b32 v3, v2
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v1
+; VERDE-NEXT:    s_waitcnt lgkmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; FIJI-LABEL: image_load_mmo:
+; FIJI:       ; %bb.0:
+; FIJI-NEXT:    image_load v1, v[1:2], s[0:7] dmask:0x1 unorm
+; FIJI-NEXT:    v_mov_b32_e32 v3, 0
+; FIJI-NEXT:    s_mov_b32 m0, -1
+; FIJI-NEXT:    ds_write2_b32 v0, v3, v3 offset1:4
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v0, v1
+; FIJI-NEXT:    s_waitcnt lgkmcnt(0)
+; FIJI-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: image_load_mmo:
+; GFX6789:       ; %bb.0:
+; GFX6789-NEXT:    image_load v1, v[1:2], s[0:7] dmask:0x1 unorm
+; GFX6789-NEXT:    v_mov_b32_e32 v3, 0
+; GFX6789-NEXT:    ds_write2_b32 v0, v3, v3 offset1:4
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v1
+; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; NOPRT-LABEL: image_load_mmo:
+; NOPRT:       ; %bb.0:
+; NOPRT-NEXT:    image_load v1, v[1:2], s[0:7] dmask:0x1 unorm
+; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
+; NOPRT-NEXT:    ds_write2_b32 v0, v3, v3 offset1:4
+; NOPRT-NEXT:    s_waitcnt vmcnt(0)
+; NOPRT-NEXT:    v_mov_b32_e32 v0, v1
+; NOPRT-NEXT:    s_waitcnt lgkmcnt(0)
+; NOPRT-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: image_load_mmo:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    image_load v1, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm ; encoding: [0x08,0x11,0x00,0xf0,0x01,0x01,0x00,0x00]
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0 ; encoding: [0x80,0x02,0x06,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ds_write2_b32 v0, v3, v3 offset1:4 ; encoding: [0x00,0x04,0x38,0xd8,0x00,0x03,0x03,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
   store float 0.000000e+00, float addrspace(3)* %lds
   %c0 = extractelement <2 x i32> %c, i32 0
   %c1 = extractelement <2 x i32> %c, i32 1

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
index de2a775bc893..89b23c7742cb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -41,7 +41,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_c_cl_2d:
-; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+; GCN: image_gather4_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}}
 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -57,7 +57,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_c_b_2d:
-; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+; GCN: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}}
 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -65,7 +65,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_b_cl_2d:
-; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+; GCN: image_gather4_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}}
 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -89,7 +89,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_c_l_2d:
-; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
+; GCN: image_gather4_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16{{$}}
 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
index b9eed11c8588..2e4c38bae533 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -4,8 +4,8 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
 
 ; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16:
-; UNPACKED: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}}
-; PACKED: image_gather4_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x4 d16{{$}}
+; UNPACKED: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x4 d16{{$}}
+; PACKED: image_gather4_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x4 d16{{$}}
 ; GFX10: image_gather4_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D d16{{$}}
 define amdgpu_ps <2 x float> @image_gather4_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
 main_body:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
index bb71ee00b73e..df33d2b082b5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
@@ -12,7 +12,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_cube:
-; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
+; GFX6789: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da{{$}}
 ; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE ;
 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
 main_body:
@@ -21,7 +21,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_2darray:
-; GFX6789: image_gather4 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 da{{$}}
+; GFX6789: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da{{$}}
 ; GFX10: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ;
 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
 main_body:
@@ -30,7 +30,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_c_2d:
-; GFX6789: image_gather4_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}}
 ; GFX10: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
 main_body:
@@ -39,7 +39,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_cl_2d:
-; GFX6789: image_gather4_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}}
 ; GFX10: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
 main_body:
@@ -57,7 +57,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_b_2d:
-; GFX6789: image_gather4_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}}
 ; GFX10: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
 main_body:
@@ -93,7 +93,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_l_2d:
-; GFX6789: image_gather4_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}}
 ; GFX10: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
 main_body:
@@ -120,7 +120,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_c_lz_2d:
-; GFX6789: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GFX6789: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}}
 ; GFX10: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
 main_body:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
index 712c29917464..3034d155a250 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
 
 ; GCN-LABEL: {{^}}gather4_o_2d:
-; GCN: image_gather4_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GCN: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}}
 define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -82,7 +82,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_lz_o_2d:
-; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
+; GCN: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1{{$}}
 define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
index 52730f639272..89f6b8a3f11c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
@@ -15,7 +15,7 @@ main_body:
 
 ; GCN-LABEL: {{^}}sample_3d:
 ; NONSA: v_mov_b32_e32 v3, v0
-; NONSA: image_sample v[0:3], v[1:4],
+; NONSA: image_sample v[0:3], v[1:3],
 ; NSA: image_sample v[0:3], [v1, v2, v0],
 define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %r, float %s, float %t) {
 main_body:
@@ -70,7 +70,7 @@ main_body:
 ; GCN-LABEL: {{^}}sample_contig_contig:
 ; GCN: image_sample_c_l v0, v[0:7],
 ; NSA: image_sample v1, v[5:7],
-; NONSA: image_sample v1, v[5:8],
+; NONSA: image_sample v1, v[5:7],
 define amdgpu_ps <2 x float> @sample_contig_contig(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %s2, float %t2, float %r2) {
 main_body:
   %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare, float %s1, float %t1, float %r1, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
index 436401150d29..48a3690ce7da 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
@@ -1,374 +1,624 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+; GCN-LABEL: sample_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_2d:
-; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+; GCN-LABEL: sample_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_3d:
-; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
+; GCN-LABEL: sample_3d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v1, v2
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cube:
-; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
 define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
+; GCN-LABEL: sample_cube:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v1, v2
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_1darray:
-; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da{{$}}
 define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
+; GCN-LABEL: sample_1darray:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_2darray:
-; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
 define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
+; GCN-LABEL: sample_2darray:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v1, v2
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_1d:
-; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
+; GCN-LABEL: sample_c_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_2d:
-; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+; GCN-LABEL: sample_c_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cl_1d:
-; GCN: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
+; GCN-LABEL: sample_cl_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cl_2d:
-; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
+; GCN-LABEL: sample_cl_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v1, v2
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cl_1d:
-; GCN: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
+; GCN-LABEL: sample_c_cl_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cl_2d:
-; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
+; GCN-LABEL: sample_c_cl_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v2, v3
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_b_1d:
-; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
+; GCN-LABEL: sample_b_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_b_2d:
-; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+; GCN-LABEL: sample_b_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_b_1d:
-; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
+; GCN-LABEL: sample_c_b_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_b_2d:
-; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+; GCN-LABEL: sample_c_b_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_b_cl_1d:
-; GCN: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
+; GCN-LABEL: sample_b_cl_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_b_cl_2d:
-; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+; GCN-LABEL: sample_b_cl_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v2, v3
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_b_cl_1d:
-; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
+; GCN-LABEL: sample_c_b_cl_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_b_cl_2d:
-; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+; GCN-LABEL: sample_c_b_cl_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    s_mov_b64 s[12:13], exec
+; GCN-NEXT:    s_wqm_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v3, v4
+; GCN-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GCN-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_d_1d:
-; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
+; GCN-LABEL: sample_d_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_d_2d:
-; GCN: image_sample_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+; GCN-LABEL: sample_d_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v3, v4
+; GCN-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-NEXT:    image_sample_d v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABAL: {{^}}sample_d_3d:
-; GCN: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
+; GCN-LABEL: sample_d_3d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v4, v3
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
+; GCN-NEXT:    v_mov_b32_e32 v7, v8
+; GCN-NEXT:    v_mov_b32_e32 v2, v0
+; GCN-NEXT:    image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_1d:
-; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
+; GCN-LABEL: sample_c_d_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_2d:
-; GCN: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+; GCN-LABEL: sample_c_d_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v2, v1
+; GCN-NEXT:    v_mov_b32_e32 v4, v5
+; GCN-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-NEXT:    image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_d_cl_1d:
-; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
+; GCN-LABEL: sample_d_cl_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_d_cl_2d:
-; GCN: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+; GCN-LABEL: sample_d_cl_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
+; GCN-NEXT:    v_mov_b32_e32 v5, v6
+; GCN-NEXT:    v_mov_b32_e32 v2, v0
+; GCN-NEXT:    image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_cl_1d:
-; GCN: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
+; GCN-LABEL: sample_c_d_cl_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_cl_2d:
-; GCN: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+; GCN-LABEL: sample_c_d_cl_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v4, v3
+; GCN-NEXT:    v_mov_b32_e32 v6, v7
+; GCN-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-NEXT:    v_mov_b32_e32 v2, v0
+; GCN-NEXT:    image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cd_1d:
-; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
+; GCN-LABEL: sample_cd_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cd_2d:
-; GCN: image_sample_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+; GCN-LABEL: sample_cd_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v3, v4
+; GCN-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-NEXT:    image_sample_cd v[0:3], v[1:3], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cd_1d:
-; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
+; GCN-LABEL: sample_c_cd_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cd_2d:
-; GCN: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+; GCN-LABEL: sample_c_cd_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v2, v1
+; GCN-NEXT:    v_mov_b32_e32 v4, v5
+; GCN-NEXT:    v_mov_b32_e32 v1, v0
+; GCN-NEXT:    image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cd_cl_1d:
-; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
+; GCN-LABEL: sample_cd_cl_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cd_cl_2d:
-; GCN: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+; GCN-LABEL: sample_cd_cl_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v3, v2
+; GCN-NEXT:    v_mov_b32_e32 v5, v6
+; GCN-NEXT:    v_mov_b32_e32 v2, v0
+; GCN-NEXT:    image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
-; GCN: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
+; GCN-LABEL: sample_c_cd_cl_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
-; GCN: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+; GCN-LABEL: sample_c_cd_cl_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v4, v3
+; GCN-NEXT:    v_mov_b32_e32 v6, v7
+; GCN-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-NEXT:    v_mov_b32_e32 v2, v0
+; GCN-NEXT:    image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_l_1d:
-; GCN: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
+; GCN-LABEL: sample_l_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_l_2d:
-; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
+; GCN-LABEL: sample_l_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v1, v2
+; GCN-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_l_1d:
-; GCN: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
+; GCN-LABEL: sample_c_l_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_l_2d:
-; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
+; GCN-LABEL: sample_c_l_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v2, v3
+; GCN-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_lz_1d:
-; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+; GCN-LABEL: sample_lz_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_lz_2d:
-; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+; GCN-LABEL: sample_lz_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_lz_1d:
-; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
+; GCN-LABEL: sample_c_lz_1d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_lz_2d:
-; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
 define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+; GCN-LABEL: sample_c_lz_2d:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
-; GCN: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da{{$}}
 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
+; GCN-LABEL: sample_c_d_o_2darray_V1:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v4, v2
+; GCN-NEXT:    v_mov_b32_e32 v7, v8
+; GCN-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-NEXT:    v_mov_b32_e32 v2, v0
+; GCN-NEXT:    image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret float %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
-; GCN: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da{{$}}
 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
+; GCN-LABEL: sample_c_d_o_2darray_V2:
+; GCN:       ; %bb.0: ; %main_body
+; GCN-NEXT:    v_mov_b32_e32 v5, v4
+; GCN-NEXT:    v_mov_b32_e32 v4, v2
+; GCN-NEXT:    v_mov_b32_e32 v7, v8
+; GCN-NEXT:    v_mov_b32_e32 v3, v1
+; GCN-NEXT:    v_mov_b32_e32 v2, v0
+; GCN-NEXT:    image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <2 x float> %v

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
index a6c54af535c2..192dd233eb20 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
@@ -1,22 +1,120 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX81,GFX89 %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX9,GFX89 %s
-; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=TONGA %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GFX81 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10 %s
 
-; GCN-LABEL: {{^}}image_sample_2d_f16:
-; GFX89: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16{{$}}
-; GFX10: image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D d16{{$}}
 define amdgpu_ps half @image_sample_2d_f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; TONGA-LABEL: image_sample_2d_f16:
+; TONGA:       ; %bb.0: ; %main_body
+; TONGA-NEXT:    s_mov_b64 s[12:13], exec
+; TONGA-NEXT:    s_wqm_b64 exec, exec
+; TONGA-NEXT:    s_and_b64 exec, exec, s[12:13]
+; TONGA-NEXT:    image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    ; return to shader part epilog
+;
+; GFX81-LABEL: image_sample_2d_f16:
+; GFX81:       ; %bb.0: ; %main_body
+; GFX81-NEXT:    s_mov_b64 s[12:13], exec
+; GFX81-NEXT:    s_wqm_b64 exec, exec
+; GFX81-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX81-NEXT:    s_nop 0
+; GFX81-NEXT:    image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16
+; GFX81-NEXT:    s_waitcnt vmcnt(0)
+; GFX81-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: image_sample_2d_f16:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 d16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: image_sample_2d_f16:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample v0, v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D d16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %tex = call half @llvm.amdgcn.image.sample.2d.f16.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   ret half %tex
 }
 
-; GCN-LABEL: {{^}}image_sample_2d_f16_tfe:
-; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0
-; PACKED: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16{{$}}
-; UNPACKED: image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16{{$}}
 define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, i32 addrspace(1)* inreg %out) {
+; TONGA-LABEL: image_sample_2d_f16_tfe:
+; TONGA:       ; %bb.0: ; %main_body
+; TONGA-NEXT:    s_mov_b64 s[14:15], exec
+; TONGA-NEXT:    s_wqm_b64 exec, exec
+; TONGA-NEXT:    v_mov_b32_e32 v2, 0
+; TONGA-NEXT:    v_mov_b32_e32 v4, s12
+; TONGA-NEXT:    v_mov_b32_e32 v5, s13
+; TONGA-NEXT:    v_mov_b32_e32 v3, v2
+; TONGA-NEXT:    s_and_b64 exec, exec, s[14:15]
+; TONGA-NEXT:    image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_mov_b32_e32 v0, v2
+; TONGA-NEXT:    flat_store_dword v[4:5], v3
+; TONGA-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; TONGA-NEXT:    ; return to shader part epilog
+;
+; GFX81-LABEL: image_sample_2d_f16_tfe:
+; GFX81:       ; %bb.0: ; %main_body
+; GFX81-NEXT:    s_mov_b64 s[14:15], exec
+; GFX81-NEXT:    s_wqm_b64 exec, exec
+; GFX81-NEXT:    v_mov_b32_e32 v2, 0
+; GFX81-NEXT:    v_mov_b32_e32 v4, s12
+; GFX81-NEXT:    v_mov_b32_e32 v5, s13
+; GFX81-NEXT:    v_mov_b32_e32 v3, v2
+; GFX81-NEXT:    s_and_b64 exec, exec, s[14:15]
+; GFX81-NEXT:    s_nop 0
+; GFX81-NEXT:    image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16
+; GFX81-NEXT:    s_waitcnt vmcnt(0)
+; GFX81-NEXT:    v_mov_b32_e32 v0, v2
+; GFX81-NEXT:    s_nop 0
+; GFX81-NEXT:    s_nop 0
+; GFX81-NEXT:    flat_store_dword v[4:5], v3
+; GFX81-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX81-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: image_sample_2d_f16_tfe:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[14:15], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    v_mov_b32_e32 v4, s12
+; GFX9-NEXT:    v_mov_b32_e32 v5, s13
+; GFX9-NEXT:    v_mov_b32_e32 v3, v2
+; GFX9-NEXT:    s_and_b64 exec, exec, s[14:15]
+; GFX9-NEXT:    image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 tfe d16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9-NEXT:    global_store_dword v[4:5], v3, off
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: image_sample_2d_f16_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s14, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-NEXT:    v_mov_b32_e32 v4, s12
+; GFX10-NEXT:    v_mov_b32_e32 v5, s13
+; GFX10-NEXT:    v_mov_b32_e32 v3, v2
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s14
+; GFX10-NEXT:    image_sample v[2:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe d16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, v2
+; GFX10-NEXT:    global_store_dword v[4:5], v3, off
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %tex = call {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
   %tex.vec = extractvalue {half, i32} %tex, 0
@@ -25,22 +123,84 @@ main_body:
   ret half %tex.vec
 }
 
-; GCN-LABEL: {{^}}image_sample_c_d_1d_v2f16:
-; UNPACKED: image_sample_c_d v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}}
-; PACKED: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16{{$}}
-; GFX10: image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D d16{{$}}
 define amdgpu_ps float @image_sample_c_d_1d_v2f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+; TONGA-LABEL: image_sample_c_d_1d_v2f16:
+; TONGA:       ; %bb.0: ; %main_body
+; TONGA-NEXT:    image_sample_c_d v[0:1], v[0:3], s[0:7], s[8:11] dmask:0x3 d16
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; TONGA-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; TONGA-NEXT:    ; return to shader part epilog
+;
+; GFX81-LABEL: image_sample_c_d_1d_v2f16:
+; GFX81:       ; %bb.0: ; %main_body
+; GFX81-NEXT:    image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16
+; GFX81-NEXT:    s_waitcnt vmcnt(0)
+; GFX81-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: image_sample_c_d_1d_v2f16:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 d16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: image_sample_c_d_1d_v2f16:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d v0, v[0:3], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D d16
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %tex = call <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32 3, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %r = bitcast <2 x half> %tex to float
   ret float %r
 }
 
-; GCN-LABEL: {{^}}image_sample_c_d_1d_v2f16_tfe:
-; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0
-; UNPACKED: image_sample_c_d v[{{[0-9]+:[0-9]+}}], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16{{$}}
-; PACKED: image_sample_c_d v[{{[0-9]+:[0-9]+}}], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16{{$}}
 define amdgpu_ps <2 x float> @image_sample_c_d_1d_v2f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+; TONGA-LABEL: image_sample_c_d_1d_v2f16_tfe:
+; TONGA:       ; %bb.0: ; %main_body
+; TONGA-NEXT:    v_mov_b32_e32 v4, 0
+; TONGA-NEXT:    v_mov_b32_e32 v5, v4
+; TONGA-NEXT:    v_mov_b32_e32 v6, v4
+; TONGA-NEXT:    image_sample_c_d v[4:6], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_lshlrev_b32_e32 v0, 16, v5
+; TONGA-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; TONGA-NEXT:    v_mov_b32_e32 v1, v6
+; TONGA-NEXT:    ; return to shader part epilog
+;
+; GFX81-LABEL: image_sample_c_d_1d_v2f16_tfe:
+; GFX81:       ; %bb.0: ; %main_body
+; GFX81-NEXT:    v_mov_b32_e32 v4, 0
+; GFX81-NEXT:    v_mov_b32_e32 v5, v4
+; GFX81-NEXT:    s_nop 0
+; GFX81-NEXT:    s_nop 0
+; GFX81-NEXT:    image_sample_c_d v[4:5], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16
+; GFX81-NEXT:    s_waitcnt vmcnt(0)
+; GFX81-NEXT:    v_mov_b32_e32 v0, v4
+; GFX81-NEXT:    v_mov_b32_e32 v1, v5
+; GFX81-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: image_sample_c_d_1d_v2f16_tfe:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_mov_b32_e32 v5, v4
+; GFX9-NEXT:    image_sample_c_d v[4:5], v[0:3], s[0:7], s[8:11] dmask:0x3 tfe d16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, v4
+; GFX9-NEXT:    v_mov_b32_e32 v1, v5
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: image_sample_c_d_1d_v2f16_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    v_mov_b32_e32 v4, v1
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0
+; GFX10-NEXT:    image_sample_c_d v[0:1], [v5, v4, v2, v3], s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe d16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %tex = call {<2 x half>,i32} @llvm.amdgcn.image.sample.c.d.1d.v2f16i32.f32.f32(i32 3, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
   %tex.vec = extractvalue {<2 x half>, i32} %tex, 0
@@ -52,22 +212,120 @@ main_body:
   ret <2 x float> %r
 }
 
-; GCN-LABEL: {{^}}image_sample_b_2d_v4f16:
-; UNPACKED: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}}
-; PACKED: image_sample_b v[0:1], v[0:3], s[0:7], s[8:11] dmask:0xf d16{{$}}
-; GFX10: image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D d16{{$}}
 define amdgpu_ps <2 x float> @image_sample_b_2d_v4f16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+; TONGA-LABEL: image_sample_b_2d_v4f16:
+; TONGA:       ; %bb.0: ; %main_body
+; TONGA-NEXT:    s_mov_b64 s[12:13], exec
+; TONGA-NEXT:    s_wqm_b64 exec, exec
+; TONGA-NEXT:    s_and_b64 exec, exec, s[12:13]
+; TONGA-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf d16
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; TONGA-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; TONGA-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; TONGA-NEXT:    v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; TONGA-NEXT:    ; return to shader part epilog
+;
+; GFX81-LABEL: image_sample_b_2d_v4f16:
+; GFX81:       ; %bb.0: ; %main_body
+; GFX81-NEXT:    s_mov_b64 s[12:13], exec
+; GFX81-NEXT:    s_wqm_b64 exec, exec
+; GFX81-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX81-NEXT:    s_nop 0
+; GFX81-NEXT:    image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf d16
+; GFX81-NEXT:    s_waitcnt vmcnt(0)
+; GFX81-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: image_sample_b_2d_v4f16:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf d16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: image_sample_b_2d_v4f16:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_b v[0:1], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D d16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %tex = call <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
   %r = bitcast <4 x half> %tex to <2 x float>
   ret <2 x float> %r
 }
 
-; GCN-LABEL: {{^}}image_sample_b_2d_v4f16_tfe:
-; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0
-; UNPACKED: image_sample_b v[{{[0-9]+:[0-9]+}}], v[0:3], s[0:7], s[8:11] dmask:0xf tfe d16{{$}}
-; PACKED: image_sample_b v[{{[0-9]+:[0-9]+}}], v[0:3], s[0:7], s[8:11] dmask:0xf tfe d16{{$}}
 define amdgpu_ps <4 x float> @image_sample_b_2d_v4f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+; TONGA-LABEL: image_sample_b_2d_v4f16_tfe:
+; TONGA:       ; %bb.0: ; %main_body
+; TONGA-NEXT:    s_mov_b64 s[12:13], exec
+; TONGA-NEXT:    s_wqm_b64 exec, exec
+; TONGA-NEXT:    v_mov_b32_e32 v3, 0
+; TONGA-NEXT:    v_mov_b32_e32 v4, v3
+; TONGA-NEXT:    v_mov_b32_e32 v5, v3
+; TONGA-NEXT:    v_mov_b32_e32 v6, v3
+; TONGA-NEXT:    v_mov_b32_e32 v7, v3
+; TONGA-NEXT:    s_and_b64 exec, exec, s[12:13]
+; TONGA-NEXT:    image_sample_b v[3:7], v[0:2], s[0:7], s[8:11] dmask:0xf tfe d16
+; TONGA-NEXT:    s_waitcnt vmcnt(0)
+; TONGA-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
+; TONGA-NEXT:    v_lshlrev_b32_e32 v1, 16, v6
+; TONGA-NEXT:    v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; TONGA-NEXT:    v_or_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; TONGA-NEXT:    v_mov_b32_e32 v2, v7
+; TONGA-NEXT:    ; return to shader part epilog
+;
+; GFX81-LABEL: image_sample_b_2d_v4f16_tfe:
+; GFX81:       ; %bb.0: ; %main_body
+; GFX81-NEXT:    s_mov_b64 s[12:13], exec
+; GFX81-NEXT:    s_wqm_b64 exec, exec
+; GFX81-NEXT:    v_mov_b32_e32 v3, 0
+; GFX81-NEXT:    v_mov_b32_e32 v4, v3
+; GFX81-NEXT:    v_mov_b32_e32 v5, v3
+; GFX81-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX81-NEXT:    s_nop 0
+; GFX81-NEXT:    image_sample_b v[3:5], v[0:2], s[0:7], s[8:11] dmask:0xf tfe d16
+; GFX81-NEXT:    s_waitcnt vmcnt(0)
+; GFX81-NEXT:    v_mov_b32_e32 v0, v3
+; GFX81-NEXT:    v_mov_b32_e32 v1, v4
+; GFX81-NEXT:    v_mov_b32_e32 v2, v5
+; GFX81-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: image_sample_b_2d_v4f16_tfe:
+; GFX9:       ; %bb.0: ; %main_body
+; GFX9-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-NEXT:    s_wqm_b64 exec, exec
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-NEXT:    v_mov_b32_e32 v4, v3
+; GFX9-NEXT:    v_mov_b32_e32 v5, v3
+; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    image_sample_b v[3:5], v[0:2], s[0:7], s[8:11] dmask:0xf tfe d16
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, v3
+; GFX9-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-NEXT:    v_mov_b32_e32 v2, v5
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: image_sample_b_2d_v4f16_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    v_mov_b32_e32 v5, v2
+; GFX10-NEXT:    v_mov_b32_e32 v4, v1
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT:    image_sample_b v[0:2], v[3:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe d16
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %tex = call {<4 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v4f16i32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
   %tex.vec = extractvalue {<4 x half>, i32} %tex, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index 621a834d3e21..59419a65c70e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -1,25 +1,99 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6789 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10 %s
 
-; GCN-LABEL: {{^}}sample_1d:
-; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_1d_tfe:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GCN: v_mov_b32_e32 v2, v0
-; GCN: v_mov_b32_e32 v3, v0
-; GCN: v_mov_b32_e32 v4, v0
-; GFX6789: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe{{$}}
-; GFX10: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ;
 define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
+; VERDE-LABEL: sample_1d_tfe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[16:17], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    s_mov_b32 s15, 0xf000
+; VERDE-NEXT:    s_mov_b32 s14, -1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[16:17]
+; VERDE-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[12:15], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_tfe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[14:15], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s12
+; GFX6789-NEXT:    v_mov_b32_e32 v7, s13
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[14:15]
+; GFX6789-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[6:7], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s12 ; encoding: [0x0c,0x02,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, s13 ; encoding: [0x0d,0x02,0x0e,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0f,0x81,0xf0,0x05,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -28,12 +102,43 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_1:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe{{$}}
-; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ;
 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
+; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_tfe_adjust_writemask_1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0x81,0xf0,0x02,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %res.vec = extractvalue {<4 x float>,i32} %v, 0
@@ -45,12 +150,43 @@ main_body:
   ret <2 x float> %res
 }
 
-; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_2:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe{{$}}
-; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe ;
 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_tfe_adjust_writemask_2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x02,0x81,0xf0,0x02,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %res.vec = extractvalue {<4 x float>,i32} %v, 0
@@ -62,12 +198,43 @@ main_body:
   ret <2 x float> %res
 }
 
-; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_3:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe{{$}}
-; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe ;
 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_tfe_adjust_writemask_3:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x04,0x81,0xf0,0x02,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %res.vec = extractvalue {<4 x float>,i32} %v, 0
@@ -79,12 +246,43 @@ main_body:
   ret <2 x float> %res
 }
 
-; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_4:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GFX6789: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe{{$}}
-; GFX10: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe ;
 define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_tfe_adjust_writemask_4:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x08,0x81,0xf0,0x02,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %res.vec = extractvalue {<4 x float>,i32} %v, 0
@@ -96,13 +294,46 @@ main_body:
   ret <2 x float> %res
 }
 
-; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_12:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GCN: v_mov_b32_e32 v2, v0
-; GFX6789: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe{{$}}
-; GFX10: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe ;
 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x03,0x81,0xf0,0x03,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %res.vec = extractvalue {<4 x float>,i32} %v, 0
@@ -116,13 +347,46 @@ main_body:
   ret <4 x float> %res
 }
 
-; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_24:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GCN: v_mov_b32_e32 v2, v0
-; GFX6789: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe{{$}}
-; GFX10: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe ;
 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0a,0x81,0xf0,0x03,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %res.vec = extractvalue {<4 x float>,i32} %v, 0
@@ -136,14 +400,49 @@ main_body:
   ret <4 x float> %res
 }
 
-; GCN-LABEL: {{^}}sample_1d_tfe_adjust_writemask_134:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GCN: v_mov_b32_e32 v2, v0
-; GCN: v_mov_b32_e32 v3, v0
-; GFX6789: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe{{$}}
-; GFX10: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe ;
 define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_tfe_adjust_writemask_134:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x0d,0x81,0xf0,0x04,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %res.vec = extractvalue {<4 x float>,i32} %v, 0
@@ -159,15 +458,64 @@ main_body:
   ret <4 x float> %res
 }
 
-; GCN-LABEL: {{^}}sample_1d_lwe:
-; GCN: v_mov_b32_e32 v0, 0
-; GCN: v_mov_b32_e32 v1, v0
-; GCN: v_mov_b32_e32 v2, v0
-; GCN: v_mov_b32_e32 v3, v0
-; GCN: v_mov_b32_e32 v4, v0
-; GFX6789: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe{{$}}
-; GFX10: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ;
 define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
+; VERDE-LABEL: sample_1d_lwe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[16:17], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    v_mov_b32_e32 v5, v0
+; VERDE-NEXT:    v_mov_b32_e32 v0, 0
+; VERDE-NEXT:    s_mov_b32 s15, 0xf000
+; VERDE-NEXT:    s_mov_b32 s14, -1
+; VERDE-NEXT:    v_mov_b32_e32 v1, v0
+; VERDE-NEXT:    v_mov_b32_e32 v2, v0
+; VERDE-NEXT:    v_mov_b32_e32 v3, v0
+; VERDE-NEXT:    v_mov_b32_e32 v4, v0
+; VERDE-NEXT:    s_and_b64 exec, exec, s[16:17]
+; VERDE-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    buffer_store_dword v4, off, s[12:15], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_lwe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[14:15], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v6, s12
+; GFX6789-NEXT:    v_mov_b32_e32 v7, s13
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
+; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[14:15]
+; GFX6789-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[6:7], v4, off
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_lwe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s14, exec_lo ; encoding: [0x7e,0x03,0x8e,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    v_mov_b32_e32 v5, v0 ; encoding: [0x00,0x03,0x0a,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v6, s12 ; encoding: [0x0c,0x02,0x0c,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v7, s13 ; encoding: [0x0d,0x02,0x0e,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v3, v0 ; encoding: [0x00,0x03,0x06,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v4, v0 ; encoding: [0x00,0x03,0x08,0x7e]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s14 ; encoding: [0x7e,0x0e,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe ; encoding: [0x00,0x0f,0x82,0xf0,0x05,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[6:7], v4, off ; encoding: [0x00,0x80,0x70,0xdc,0x06,0x04,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
   %v.vec = extractvalue {<4 x float>, i32} %v, 0
@@ -176,406 +524,1274 @@ main_body:
   ret <4 x float> %v.vec
 }
 
-; GCN-LABEL: {{^}}sample_2d:
-; GFX6789: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; VERDE-LABEL: sample_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_3d:
-; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ;
 define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
+; VERDE-LABEL: sample_3d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_3d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_3d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cube:
-; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}}
-; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ;
 define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
+; VERDE-LABEL: sample_cube:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_cube:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cube:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_1darray:
-; GFX6789: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da{{$}}
-; GFX10: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ;
 define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
+; VERDE-LABEL: sample_1darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x20,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_2darray:
-; GFX6789: image_sample v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf da{{$}}
-; GFX10: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ;
 define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
+; VERDE-LABEL: sample_2darray:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_2darray:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_2darray:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x0f,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_1d:
-; GFX6789: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; VERDE-LABEL: sample_c_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa0,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_2d:
-; GFX6789: image_sample_c v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; VERDE-LABEL: sample_c_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cl_1d:
-; GFX6789: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
+; VERDE-LABEL: sample_cl_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_cl_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x84,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cl_2d:
-; GFX6789: image_sample_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
+; VERDE-LABEL: sample_cl_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_cl_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x84,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cl_1d:
-; GFX6789: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
+; VERDE-LABEL: sample_c_cl_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_cl_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa4,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cl_2d:
-; GFX6789: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
+; VERDE-LABEL: sample_c_cl_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_cl_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_b_1d:
-; GFX6789: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
+; VERDE-LABEL: sample_b_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_b_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_b_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x94,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_b_2d:
-; GFX6789: image_sample_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+; VERDE-LABEL: sample_b_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_b_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_b_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x94,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_b_1d:
-; GFX6789: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
+; VERDE-LABEL: sample_c_b_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_b_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_b_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb4,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_b_2d:
-; GFX6789: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
+; VERDE-LABEL: sample_c_b_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_b_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_b_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb4,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_b_cl_1d:
-; GFX6789: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
+; VERDE-LABEL: sample_b_cl_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_b_cl_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_b_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x98,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_b_cl_2d:
-; GFX6789: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
+; VERDE-LABEL: sample_b_cl_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_b_cl_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_b_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x98,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_b_cl_1d:
-; GFX6789: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
+; VERDE-LABEL: sample_c_b_cl_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_b_cl_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_b_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb8,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_b_cl_2d:
-; GFX6789: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
+; VERDE-LABEL: sample_c_b_cl_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_b_cl_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_b_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample_c_b_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb8,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_d_1d:
-; GFX6789: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+; VERDE-LABEL: sample_d_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_d_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_d_2d:
-; GFX6789: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; VERDE-LABEL: sample_d_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_d_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x88,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_1d:
-; GFX6789: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+; VERDE-LABEL: sample_c_d_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_d_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_2d:
-; GFX6789: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; VERDE-LABEL: sample_c_d_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_d_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_d_cl_1d:
-; GFX6789: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+; VERDE-LABEL: sample_d_cl_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_d_cl_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_d_cl_2d:
-; GFX6789: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; VERDE-LABEL: sample_d_cl_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_d_cl_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_d_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x8c,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_cl_1d:
-; GFX6789: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+; VERDE-LABEL: sample_c_d_cl_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_d_cl_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_cl_2d:
-; GFX6789: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; VERDE-LABEL: sample_c_d_cl_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_d_cl_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xac,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cd_1d:
-; GFX6789: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+; VERDE-LABEL: sample_cd_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_cd_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cd_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cd_2d:
-; GFX6789: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; VERDE-LABEL: sample_cd_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_cd_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cd_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa0,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cd_1d:
-; GFX6789: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+; VERDE-LABEL: sample_c_cd_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_cd_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cd_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cd_2d:
-; GFX6789: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; VERDE-LABEL: sample_c_cd_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_cd_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cd_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa8,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cd_cl_1d:
-; GFX6789: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+; VERDE-LABEL: sample_cd_cl_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_cd_cl_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cd_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_cd_cl_2d:
-; GFX6789: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; VERDE-LABEL: sample_cd_cl_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_cd_cl_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_cd_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xa4,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
-; GFX6789: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+; VERDE-LABEL: sample_c_cd_cl_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_cd_cl_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cd_cl_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
-; GFX6789: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; VERDE-LABEL: sample_c_cd_cl_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_cd_cl_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_cd_cl_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xac,0xf1,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_l_1d:
-; GFX6789: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+; VERDE-LABEL: sample_l_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_l_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_l_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x90,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_l_2d:
-; GFX6789: image_sample_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+; VERDE-LABEL: sample_l_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_l_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_l_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x90,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_l_1d:
-; GFX6789: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+; VERDE-LABEL: sample_c_l_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_l_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_l_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xb0,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_l_2d:
-; GFX6789: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+; VERDE-LABEL: sample_c_l_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_l_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_l_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xb0,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_lz_1d:
-; GFX6789: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_lz_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_lz_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_lz_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0x9c,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_lz_2d:
-; GFX6789: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; VERDE-LABEL: sample_lz_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_lz_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_lz_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x9c,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_lz_1d:
-; GFX6789: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ;
 define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; VERDE-LABEL: sample_c_lz_1d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_lz_1d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_lz_1d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0f,0xbc,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_lz_2d:
-; GFX6789: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-; GFX10: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ;
 define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; VERDE-LABEL: sample_c_lz_2d:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_lz_2d:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_lz_2d:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
-; GFX6789: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da{{$}}
-; GFX10: image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ;
 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
+; VERDE-LABEL: sample_c_d_o_2darray_V1:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_d_o_2darray_V1:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_o_2darray_V1:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d_o v0, v[0:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x04,0xe8,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret float %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1_tfe:
-; GFX6789: image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da{{$}}
-; GFX10: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ;
 define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) {
+; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v9, 0
+; VERDE-NEXT:    v_mov_b32_e32 v10, v9
+; VERDE-NEXT:    image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
+; VERDE-NEXT:    s_mov_b32 s15, 0xf000
+; VERDE-NEXT:    s_mov_b32 s14, -1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v9
+; VERDE-NEXT:    buffer_store_dword v10, off, s[12:15], 0
+; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v9, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v10, v9
+; GFX6789-NEXT:    image_sample_c_d_o v[9:10], v[0:15], s[0:7], s[8:11] dmask:0x4 tfe da
+; GFX6789-NEXT:    v_mov_b32_e32 v0, s12
+; GFX6789-NEXT:    v_mov_b32_e32 v1, s13
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    global_store_dword v[0:1], v10, off
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v10, v0 ; encoding: [0x00,0x03,0x14,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v9, v1 ; encoding: [0x01,0x03,0x12,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v11, s13 ; encoding: [0x0d,0x02,0x16,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x04,0xe9,0xf0,0x0a,0x00,0x40,0x00,0x09,0x02,0x03,0x04,0x05,0x06,0x07,0x08]
+; GFX10-NEXT:    v_mov_b32_e32 v10, s12 ; encoding: [0x0c,0x02,0x14,0x7e]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    global_store_dword v[10:11], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x0a,0x01,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %v.vec = extractvalue {float, i32} %v, 0
@@ -584,19 +1800,67 @@ main_body:
   ret float %v.vec
 }
 
-; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
-; GFX6789: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da{{$}}
-; GFX10: image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ;
 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
+; VERDE-LABEL: sample_c_d_o_2darray_V2:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_d_o_2darray_V2:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_o_2darray_V2:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    image_sample_c_d_o v[0:1], v[0:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x28,0x06,0xe8,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <2 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2_tfe:
-; GFX6789: image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da{{$}}
-; GFX10: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ;
 define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
+; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    v_mov_b32_e32 v9, 0
+; VERDE-NEXT:    v_mov_b32_e32 v10, v9
+; VERDE-NEXT:    v_mov_b32_e32 v11, v9
+; VERDE-NEXT:    image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    v_mov_b32_e32 v0, v9
+; VERDE-NEXT:    v_mov_b32_e32 v1, v10
+; VERDE-NEXT:    v_mov_b32_e32 v2, v11
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    v_mov_b32_e32 v9, 0
+; GFX6789-NEXT:    v_mov_b32_e32 v10, v9
+; GFX6789-NEXT:    v_mov_b32_e32 v11, v9
+; GFX6789-NEXT:    image_sample_c_d_o v[9:11], v[0:15], s[0:7], s[8:11] dmask:0x6 tfe da
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    v_mov_b32_e32 v0, v9
+; GFX6789-NEXT:    v_mov_b32_e32 v1, v10
+; GFX6789-NEXT:    v_mov_b32_e32 v2, v11
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    v_mov_b32_e32 v11, v0 ; encoding: [0x00,0x03,0x16,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v9, v2 ; encoding: [0x02,0x03,0x12,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v10, v1 ; encoding: [0x01,0x03,0x14,0x7e]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0 ; encoding: [0x00,0x03,0x02,0x7e]
+; GFX10-NEXT:    v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e]
+; GFX10-NEXT:    image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v4, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; encoding: [0x2c,0x06,0xe9,0xf0,0x0b,0x00,0x40,0x00,0x0a,0x09,0x03,0x04,0x05,0x06,0x07,0x08]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
   %v.vec = extractvalue {<2 x float>, i32} %v, 0
@@ -610,125 +1874,456 @@ main_body:
   ret <4 x float> %res.2
 }
 
-; GCN-LABEL: {{^}}sample_1d_unorm:
-; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm{{$}}
-; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ;
 define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_unorm:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_unorm:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_unorm:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x1f,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_1d_glc:
-; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc{{$}}
-; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc ;
 define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_glc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_glc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_glc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x2f,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_1d_slc:
-; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc{{$}}
-; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc ;
 define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_slc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_slc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; encoding: [0x00,0x0f,0x80,0xf2,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}sample_1d_glc_slc:
-; GFX6789: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc{{$}}
-; GFX10: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc ;
 define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: sample_1d_glc_slc:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: sample_1d_glc_slc:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: sample_1d_glc_slc:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc ; encoding: [0x00,0x2f,0x80,0xf2,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
   ret <4 x float> %v
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_0:
-; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
 define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_0:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_0:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_0:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %elt0 = extractelement <4 x float> %r, i32 0
   ret float %elt0
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_01
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
 define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_01:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_01:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_01:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x03,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %out
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_012
-; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
 define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_012:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_012:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_012:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %out
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_12
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
 define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_12:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_12:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_12:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x06,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %out
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_03
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
 define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_03:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_03:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_03:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x09,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
   ret <2 x float> %out
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_13
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
 define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_13:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_13:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_13:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0a,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %out
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_123
-; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
 define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_123:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_123:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_123:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0e,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %out
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled
-; GCN-NOT: image
 define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_none_enabled:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_none_enabled:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %r
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
 define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_123_to_12:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_123_to_12:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x06,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %out
 }
 
-; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13
-; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
 define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; VERDE-LABEL: adjust_writemask_sample_013_to_13:
+; VERDE:       ; %bb.0: ; %main_body
+; VERDE-NEXT:    s_mov_b64 s[12:13], exec
+; VERDE-NEXT:    s_wqm_b64 exec, exec
+; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
+; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
+; VERDE-NEXT:    s_waitcnt vmcnt(0)
+; VERDE-NEXT:    ; return to shader part epilog
+;
+; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
+; GFX6789:       ; %bb.0: ; %main_body
+; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
+; GFX6789-NEXT:    s_wqm_b64 exec, exec
+; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
+; GFX6789-NEXT:    s_waitcnt vmcnt(0)
+; GFX6789-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: adjust_writemask_sample_013_to_13:
+; GFX10:       ; %bb.0: ; %main_body
+; GFX10-NEXT:    s_mov_b32 s12, exec_lo ; encoding: [0x7e,0x03,0x8c,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo ; encoding: [0x7e,0x09,0xfe,0xbe]
+; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12 ; encoding: [0x7e,0x0c,0x7e,0x87]
+; GFX10-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x0a,0x80,0xf0,0x00,0x00,0x40,0x00]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
index 2d66a0be0690..330e2f4ce149 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
@@ -27,7 +27,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_c_l_2d:
-; GCN: image_sample_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -43,7 +43,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_l_o_2d:
-; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -51,7 +51,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_c_l_o_1d:
-; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -75,7 +75,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_c_l_2d:
-; GCN: image_gather4_c_lz v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -83,7 +83,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}gather4_l_o_2d:
-; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
index fc6ce8faa0c4..17fbd49564ce 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
@@ -10,7 +10,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_o_2d:
-; GCN: image_sample_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -18,7 +18,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_c_o_1d:
-; GCN: image_sample_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_c_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_c_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -34,7 +34,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_cl_o_1d:
-; GCN: image_sample_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_cl_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_cl_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %clamp) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -66,7 +66,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_b_o_1d:
-; GCN: image_sample_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_b_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_b_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -258,7 +258,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_l_o_1d:
-; GCN: image_sample_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_l_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -298,7 +298,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_lz_o_2d:
-; GCN: image_sample_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
@@ -306,7 +306,7 @@ main_body:
 }
 
 ; GCN-LABEL: {{^}}sample_c_lz_o_1d:
-; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
+; GCN: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
 define amdgpu_ps <4 x float> @sample_c_lz_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)