[llvm] 8fc8bf5 - [AMDGPU] Add GFX11 test coverage sharing checks with GFX10

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 8 04:00:48 PDT 2022


Author: Jay Foad
Date: 2022-07-08T11:56:49+01:00
New Revision: 8fc8bf59f2eb86307f678ae5d8c6e03b11f4bc00

URL: https://github.com/llvm/llvm-project/commit/8fc8bf59f2eb86307f678ae5d8c6e03b11f4bc00
DIFF: https://github.com/llvm/llvm-project/commit/8fc8bf59f2eb86307f678ae5d8c6e03b11f4bc00.diff

LOG: [AMDGPU] Add GFX11 test coverage sharing checks with GFX10

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
    llvm/test/CodeGen/AMDGPU/add.ll
    llvm/test/CodeGen/AMDGPU/add_i1.ll
    llvm/test/CodeGen/AMDGPU/amdpal-elf.ll
    llvm/test/CodeGen/AMDGPU/basic-branch.ll
    llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
    llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx10.ll
    llvm/test/CodeGen/AMDGPU/csr-gfx10.ll
    llvm/test/CodeGen/AMDGPU/fdiv.ll
    llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
    llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
    llvm/test/CodeGen/AMDGPU/hsa.ll
    llvm/test/CodeGen/AMDGPU/immv216.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
    llvm/test/CodeGen/AMDGPU/mad.u16.ll
    llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
    llvm/test/CodeGen/AMDGPU/mul.ll
    llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
    llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
    llvm/test/CodeGen/AMDGPU/v_cndmask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
index c20ebf492c19..d907560ff445 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
@@ -1,17 +1,18 @@
-; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX678,GFX6789 %s
-; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9,GFX6789 %s
-; RUN: llc -global-isel -mcpu=gfx1010 -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
+; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX678,GFX6789 %s
+; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX6789 %s
+; RUN: llc -global-isel -mcpu=gfx1010 -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
+; RUN: llc -global-isel -mcpu=gfx1100 -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
 
 declare i64 @llvm.smax.i64(i64, i64)
 declare i64 @llvm.smin.i64(i64, i64)
 
-; GFX10-LABEL: {{^}}v_clamp_i64_i16
+; GCN-LABEL: {{^}}v_clamp_i64_i16
 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
-; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
+; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]]
 ; GFX10: v_mov_b32_e32 [[B]], 0x7fff
 ; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]]
 define i16 @v_clamp_i64_i16(i64 %in) #0 {
@@ -22,13 +23,13 @@ entry:
   ret i16 %result
 }
 
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_reverse
+; GCN-LABEL: {{^}}v_clamp_i64_i16_reverse
 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
-; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
+; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]]
 ; GFX10: v_mov_b32_e32 [[B]], 0x7fff
 ; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]] 
 define i16 @v_clamp_i64_i16_reverse(i64 %in) #0 {
@@ -39,7 +40,7 @@ entry:
   ret i16 %result
 }
 
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_invalid_lower
+; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower
 ; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8001
 ; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
 ; GFX6789: v_cndmask_b32_e32 [[C:v[0-9]+]], 0, [[C]], vcc
@@ -54,7 +55,7 @@ entry:
   ret i16 %result
 }
 
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_invalid_lower_and_higher
+; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower_and_higher
 ; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8000
 ; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
 ; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8000, [[A]], vcc_lo
@@ -66,13 +67,13 @@ entry:
   ret i16 %result
 }
 
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_lower_than_short
+; GCN-LABEL: {{^}}v_clamp_i64_i16_lower_than_short
 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
-; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
+; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]]
 ; GFX10: v_mov_b32_e32 [[B]], 0x100
 ; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]]
 define i16 @v_clamp_i64_i16_lower_than_short(i64 %in) #0 {
@@ -83,13 +84,13 @@ entry:
   ret i16 %result
 }
 
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_lower_than_short_reverse
+; GCN-LABEL: {{^}}v_clamp_i64_i16_lower_than_short_reverse
 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
 ; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
 ; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
-; GFX10: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
+; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]]
 ; GFX10: v_mov_b32_e32 [[B]], 0x100
 ; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]]
 define i16 @v_clamp_i64_i16_lower_than_short_reverse(i64 %in) #0 {
@@ -100,7 +101,7 @@ entry:
   ret i16 %result
 }
 
-; GFX10-LABEL: {{^}}v_clamp_i64_i16_zero
+; GCN-LABEL: {{^}}v_clamp_i64_i16_zero
 ; GFX6789: v_mov_b32_e32 v0, 0
 ; GFX10: v_mov_b32_e32 v0, 0
 define i16 @v_clamp_i64_i16_zero(i64 %in) #0 {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index c301c2c7405e..fa246079e7d2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -5,6 +5,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
 ; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -18,7 +19,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
 ; CO-V2: enable_vgpr_workitem_id = 0
 
 ; ALL-NOT: v0
-; ALL: {{buffer|flat|global}}_store_dword {{.*}}v0
+; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
 
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0
 define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
@@ -37,7 +38,7 @@ define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
 ; CO-V2: {{buffer|flat}}_store_dword {{.*}}v1
 
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
-; PACKED-TID: {{buffer|flat|global}}_store_dword {{.*}}[[ID]]
+; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1
 define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.y()
@@ -55,7 +56,7 @@ define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
 ; CO-V2: {{buffer|flat}}_store_dword {{.*}}v2
 
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
-; PACKED-TID: {{buffer|flat|global}}_store_dword {{.*}}[[ID]]
+; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2
 define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.z()
@@ -65,9 +66,9 @@ define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
 
 ; ALL-LABEL: {{^}}test_workitem_id_x_usex2:
 ; ALL-NOT: v0
-; ALL: {{flat|global}}_store_dword v{{.*}}, v0
+; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
 ; ALL-NOT: v0
-; ALL: {{flat|global}}_store_dword v{{.*}}, v0
+; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
 define amdgpu_kernel void @test_workitem_id_x_usex2(i32 addrspace(1)* %out) #1 {
   %id0 = call i32 @llvm.amdgcn.workitem.id.x()
   store volatile i32 %id0, i32 addrspace(1)* %out
@@ -79,9 +80,9 @@ define amdgpu_kernel void @test_workitem_id_x_usex2(i32 addrspace(1)* %out) #1 {
 
 ; ALL-LABEL: {{^}}test_workitem_id_x_use_outside_entry:
 ; ALL-NOT: v0
-; ALL: {{flat|global}}_store_dword
+; ALL: {{flat|global}}_store_{{dword|b32}}
 ; ALL-NOT: v0
-; ALL: {{flat|global}}_store_dword v{{.*}}, v0
+; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
 define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(i32 addrspace(1)* %out, i32 %arg) #1 {
 bb0:
   store volatile i32 0, i32 addrspace(1)* %out
@@ -136,8 +137,8 @@ define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 {
 ; PACKED: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x3ff, v0
 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
 
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work_group_size !0 {
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
   %id.y = call i32 @llvm.amdgcn.workitem.id.y()
@@ -152,14 +153,14 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work
 ; CO-V2: enable_vgpr_workitem_id = 1
 
 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 
 ; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
 
 ; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 10
 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
 
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work_group_size !1 {
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
   %id.y = call i32 @llvm.amdgcn.workitem.id.y()
@@ -174,8 +175,8 @@ define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work
 ; CO-V2: enable_vgpr_workitem_id = 2
 
 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 
 ; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v2
 

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
index 26fc2929635d..0fbfe4cb6f35 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
@@ -3,6 +3,8 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10WGP %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10CU %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10WGP %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10CU %s
 
 ; FUNC-LABEL: {{^}}system_one_as_acquire:
 ; GCN:        %bb.0

diff  --git a/llvm/test/CodeGen/AMDGPU/add.ll b/llvm/test/CodeGen/AMDGPU/add.ll
index 112d7880242c..19d72274dc5d 100644
--- a/llvm/test/CodeGen/AMDGPU/add.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.ll
@@ -2,11 +2,12 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,FUNC %s
 
 ; FUNC-LABEL: {{^}}s_add_i32:
 ; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}}
 ; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]]
-; GCN: buffer_store_dword v[[V_REG]],
+; GCN: buffer_store_{{dword|b32}} v[[V_REG]],
 define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
   %a = load i32, i32 addrspace(1)* %in
@@ -83,8 +84,8 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v_add_i32:
-; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[A:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[B:v[0-9]+]]
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[A]], [[B]]
 ; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]]
 ; GFX10: v_add_nc_u32_e32 v{{[0-9]+}}, [[A]], [[B]]
@@ -100,7 +101,7 @@ define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %
 }
 
 ; FUNC-LABEL: {{^}}v_add_imm_i32:
-; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[A:v[0-9]+]]
 ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]]
 ; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]]
 ; GFX10: v_add_nc_u32_e32 v{{[0-9]+}}, 0x7b, [[A]]
@@ -172,7 +173,7 @@ endif:
 ; GFX10: v_add_nc_u32_e32 v0, s0, v0
 
 ; GCN: ; def vcc
-; GCN: ds_write_b32
+; GCN: ds_{{write|store}}_b32
 ; GCN: ; use vcc
 define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) {
   %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"()

diff  --git a/llvm/test/CodeGen/AMDGPU/add_i1.ll b/llvm/test/CodeGen/AMDGPU/add_i1.ll
index 9fc077b9442d..7910490d31c3 100644
--- a/llvm/test/CodeGen/AMDGPU/add_i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/add_i1.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
 ; GCN-LABEL: {{^}}add_var_var_i1:
 ; GFX9:  s_xor_b64

diff  --git a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll
index 3b4ecad617cb..15cf20a8079f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll
@@ -2,6 +2,8 @@
 ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF
 ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 %s
 
 ; ELF: Section {
 ; ELF: Name: .text

diff  --git a/llvm/test/CodeGen/AMDGPU/basic-branch.ll b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
index 12c90c3b1f5c..6e2533706639 100644
--- a/llvm/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
 ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope  -check-prefix=GCNNOOPT -check-prefix=GCN %s
 ; RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
+; RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
 
@@ -11,7 +12,7 @@
 
 ; GCNNOOPT: v_readlane_b32
 ; GCNNOOPT: v_readlane_b32
-; GCN: buffer_store_dword
+; GCN: buffer_store_{{dword|b32}}
 ; GCNNOOPT: s_endpgm
 
 ; GCN: {{^}}[[END]]:
@@ -29,14 +30,14 @@ end:
 }
 
 ; GCN-LABEL: {{^}}test_brcc_i1:
-; GCN: s_load_dword [[VAL:s[0-9]+]]
+; GCN: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
 ; GCNNOOPT: s_mov_b32 [[ONE:s[0-9]+]], 1{{$}}
 ; GCNNOOPT: s_and_b32 s{{[0-9]+}}, [[VAL]], [[ONE]]
 ; GCNOPT:   s_bitcmp0_b32 [[VAL]], 0
 ; GCNNOOPT: s_cmp_eq_u32
 ; GCN: s_cbranch_scc1 [[END:.LBB[0-9]+_[0-9]+]]
 
-; GCN: buffer_store_dword
+; GCN: buffer_store_{{dword|b32}}
 
 ; GCN: {{^}}[[END]]:
 ; GCN: s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
index a2c35b97aef6..7d2a022a53fc 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s
 
 ; For gfx1010, overestimate the branch size in case we need to insert
 ; a nop for the buggy offset.
@@ -21,7 +22,7 @@
 ; GCN: s_cbranch_scc1
 
 ; GCN: [[ENDBB]]:
-; GCN: global_store_dword
+; GCN: global_store_{{dword|b32}}
 define amdgpu_kernel void @long_forward_scc_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 {
 bb0:
   %cmp0 = icmp eq i32 %cnd0, 0
@@ -70,7 +71,7 @@ bb3:
 ; GCN: s_cbranch_execz
 
 ; GCN: [[ENDBB]]:
-; GCN: global_store_dword
+; GCN: global_store_{{dword|b32}}
 define void @long_forward_exec_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 {
 bb0:
   %cmp0 = icmp eq i32 %cnd0, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx10.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx10.ll
index b40c2da40f29..d0eecc6973e7 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-inst-size-gfx10.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-s-branch-bits=4 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 
 ; Make sure the code size estimate for inline asm is 12-bytes per
 ; instruction, rather than 8 in previous generations.

diff  --git a/llvm/test/CodeGen/AMDGPU/csr-gfx10.ll b/llvm/test/CodeGen/AMDGPU/csr-gfx10.ll
index 4c036280a145..deebe06490b3 100644
--- a/llvm/test/CodeGen/AMDGPU/csr-gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/csr-gfx10.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ; Make sure new higher SGPRs are callee saved
 ; GFX10-LABEL: {{^}}callee_new_sgprs:

diff  --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll
index a46009926e12..d7673e8ae7bb 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,PREGFX10,FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
 ; These tests check that fdiv is expanded correctly and also test that the
@@ -106,7 +107,7 @@ entry:
 ; GCN-NOT: [[RESULT]]
 ; PREGFX10-NOT: s_setreg
 ; GFX10-NOT: s_denorm_mode
-; GCN: buffer_store_dword [[RESULT]]
+; GCN: buffer_store_{{dword|b32}} [[RESULT]]
 define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
 entry:
   %fdiv = fdiv fast float %a, %b
@@ -121,7 +122,7 @@ entry:
 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
 ; GCN-NOT: [[RESULT]]
-; GCN: buffer_store_dword [[RESULT]]
+; GCN: buffer_store_{{dword|b32}} [[RESULT]]
 define amdgpu_kernel void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
 entry:
   %fdiv = fdiv fast float %a, %b
@@ -136,7 +137,7 @@ entry:
 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
 ; GCN-NOT: [[RESULT]]
-; GCN: buffer_store_dword [[RESULT]]
+; GCN: buffer_store_{{dword|b32}} [[RESULT]]
 define amdgpu_kernel void @fdiv_ulp25_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
 entry:
   %fdiv = fdiv fast float %a, %b, !fpmath !0
@@ -151,7 +152,7 @@ entry:
 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
 ; GCN-NOT: [[RESULT]]
-; GCN: buffer_store_dword [[RESULT]]
+; GCN: buffer_store_{{dword|b32}} [[RESULT]]
 define amdgpu_kernel void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
 entry:
   %fdiv = fdiv arcp ninf float %a, %b

diff  --git a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
index fad4ad80264b..b186290cf40f 100644
--- a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1010 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1010 %s
 
 ; GCN-LABEL: {{^}}addMul2D:
 ; GFX1010: v_fmac_f16

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll b/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll
index 7336a58a1f83..f222dca46844 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll
@@ -1,8 +1,9 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
 ; GCN-LABEL: {{^}}test_add_lit:
-; GFX10: v_add_co_u32 v{{[0-9]+}}, vcc_lo, 0x80992bff, v{{[0-9]+}}
+; GFX10: v_add_co_u32{{(_e64)?}} v{{[0-9]+}}, vcc_lo, 0x80992bff, v{{[0-9]+}}
 ; GFX10: v_add_co_ci_u32_e32 v{{[0-9]+}}, vcc_lo, 0xe7, v{{[0-9]+}}, vcc_lo
 ; GFX9:  v_mov_b32_e32 [[C2:v[0-9]+]], 0xe7
 ; GFX9:  v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0x80992bff, v{{[0-9]+}}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
index 7955dcc434c3..3dc9f209d01e 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
 
 ; GCN:      amdhsa.kernels:
 ; GCN:      .name: wavefrontsize

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll
index 26c2f035e154..61672ef1b9ad 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa.ll
@@ -6,6 +6,8 @@
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 | llvm-readobj -S --sd --syms - | FileCheck %s --check-prefix=ELF
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 | FileCheck --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
 
 ; The SHT_NOTE section contains the output from the .hsa_code_object_*
 ; directives.
@@ -56,7 +58,7 @@
 
 ; HSA: call_convention = -1
 ; HSA: .end_amd_kernel_code_t
-; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
+; HSA: s_load_{{dwordx2|b64}} s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
 
 ; Make sure we are setting the ATC bit:
 ; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
@@ -64,7 +66,7 @@
 ; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
 ; Make sure we generate flat store for HSA
 ; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
-; GFX10: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
+; GFX10: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
 
 ; HSA: .Lfunc_end0:
 ; HSA: .size   simple, .Lfunc_end0-simple

diff  --git a/llvm/test/CodeGen/AMDGPU/immv216.ll b/llvm/test/CodeGen/AMDGPU/immv216.ll
index 2cc6ea123fe6..493b2c4b6611 100644
--- a/llvm/test/CodeGen/AMDGPU/immv216.ll
+++ b/llvm/test/CodeGen/AMDGPU/immv216.ll
@@ -1,3 +1,4 @@
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
@@ -6,7 +7,7 @@
 
 ; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)* %out) #0 {
   store <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> addrspace(1)* %out
   ret void
@@ -14,7 +15,7 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)
 
 ; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 0.0, half 0.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -22,7 +23,7 @@ define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %
 
 ; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -0.0, half -0.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -30,7 +31,7 @@ define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out
 
 ; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 0.5, half 0.5>, <2 x half> addrspace(1)* %out
   ret void
@@ -38,7 +39,7 @@ define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -0.5, half -0.5>, <2 x half> addrspace(1)* %out
   ret void
@@ -46,7 +47,7 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)*
 
 ; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 1.0, half 1.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -54,7 +55,7 @@ define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -1.0, half -1.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -62,7 +63,7 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)*
 
 ; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 2.0, half 2.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -70,7 +71,7 @@ define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -2.0, half -2.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -78,7 +79,7 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)*
 
 ; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 4.0, half 4.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -86,7 +87,7 @@ define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -4.0, half -4.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -94,7 +95,7 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)*
 
 ; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 0xH3118, half 0xH3118>, <2 x half> addrspace(1)* %out
   ret void
@@ -102,7 +103,7 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118 ; encoding
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 0xHB118, half 0xHB118>, <2 x half> addrspace(1)* %out
   ret void
@@ -110,7 +111,7 @@ define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(<2 x half> addrspace
 
 ; GCN-LABEL: {{^}}store_literal_imm_v2f16:
 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c006c00
-; GCN: buffer_store_dword [[REG]]
+; GCN: buffer_store_{{dword|b32}} [[REG]]
 define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 4096.0, half 4096.0>, <2 x half> addrspace(1)* %out
   ret void
@@ -138,9 +139,9 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16:
-; GFX10: s_load_dword [[VAL:s[0-9]+]]
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
 ; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x0f,0xcc,0x02,0xe0,0x01,0x08]
-; GFX10: buffer_store_dword [[REG]]
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
 
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x06,0xe0,0x01,0x08]
@@ -163,9 +164,9 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16:
-; GFX10: s_load_dword [[VAL:s[0-9]+]]
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
 ; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x0f,0xcc,0x02,0xe2,0x01,0x08]
-; GFX10: buffer_store_dword [[REG]]
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
 
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x06,0xe2,0x01,0x08]
@@ -188,6 +189,10 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0] ; encoding:
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -209,6 +214,10 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0] ; encoding
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -231,6 +240,10 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0] ; encoding
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -252,6 +265,10 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0] ; encoding
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -273,6 +290,10 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0] ; encoding
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -294,6 +315,10 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0] ; encoding
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -315,6 +340,10 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_v2f16:
+; GFX10: buffer_load_{{dword|b32}} [[VAL:v[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: buffer_load_dword [[VAL:v[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5
 ; GFX9: buffer_store_dword [[REG]]
@@ -357,6 +386,10 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_1_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0] ; encoding
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -378,6 +411,10 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out,
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_2_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0] ; encoding
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -400,6 +437,10 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out,
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_16_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0] ; encoding
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
@@ -422,6 +463,10 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16:
+; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, -1
+; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, -1
 ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
 ; GFX9: buffer_store_dword [[REG]]
@@ -439,6 +484,10 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16:
+; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, 0xfffefffe
+; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, 0xfffefffe
 ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
 ; GFX9: buffer_store_dword [[REG]]
@@ -456,6 +505,10 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16:
+; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, 0xfff0fff0
+; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, 0xfff0fff0
 ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
 ; GFX9: buffer_store_dword [[REG]]
@@ -474,6 +527,10 @@ define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_63_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63
 ; GFX9: buffer_store_dword [[REG]]
@@ -495,6 +552,10 @@ define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_64_v2f16:
+; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64
+; GFX10: buffer_store_{{dword|b32}} [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
 ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64
 ; GFX9: buffer_store_dword [[REG]]

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
index ce1d58b92ef8..497a9c2cd62f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 
 ; GCN-LABEL: {{^}}gs_const:
 ; GCN-NOT: v_cmpx
@@ -21,7 +22,7 @@ define amdgpu_gs void @gs_const() {
 ; GCN-LABEL: {{^}}vcc_implicit_def:
 ; GCN: v_cmp_nle_f32_e32 vcc, 0, v{{[0-9]+}}
 ; GCN: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
-; GCN: s_andn2_b64 exec, exec, vcc
+; GCN: s_and{{n2|_not1}}_b64 exec, exec, vcc
 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
 define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
   %tmp0 = fcmp olt float %arg13, 0.000000e+00
@@ -55,7 +56,7 @@ define amdgpu_gs void @false() {
 ; GCN: v_cmp_lt_i32
 ; GCN: s_or_b64 s[0:1]
 ; GCN: s_xor_b64 s[0:1], s[0:1], exec
-; GCN: s_andn2_b64 s[2:3], s[2:3], s[0:1]
+; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
 ; GCN: s_and_b64 exec, exec, s[2:3]
 define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
   %c1 = icmp slt i32 %a, %b
@@ -70,7 +71,7 @@ define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; GCN: v_cmp_lt_i32
 ; GCN: v_cmp_lt_i32
 ; GCN: s_xor_b64 s[0:1]
-; GCN: s_andn2_b64 s[2:3], s[2:3], s[0:1]
+; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
 ; GCN: s_and_b64 exec, exec, s[2:3]
 define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
   %c1 = icmp slt i32 %a, %b
@@ -238,7 +239,7 @@ define amdgpu_ps void @fcmp_x2(float %a) #0 {
 ; GCN-DAG: s_wqm_b64 s[2:3], vcc
 ; GCN-DAG: s_mov_b64 s[0:1], exec
 ; GCN: s_xor_b64 s[2:3], s[2:3], exec
-; GCN: s_andn2_b64 s[0:1], s[0:1], s[2:3]
+; GCN: s_and{{n2|_not1}}_b64 s[0:1], s[0:1], s[2:3]
 ; GCN: s_and_b64 exec, exec, s[0:1]
 define amdgpu_ps float @wqm(float %a) {
   %c1 = fcmp une float %a, 0.0
@@ -288,7 +289,7 @@ endloop15:                                        ; preds = %loop3
 ; If kill is marked as defining VCC then this will fail with live interval issues.
 ; GCN-LABEL: {{^}}kill_with_loop_exit:
 ; GCN: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
-; GCN: s_andn2_b64 [[LIVE]], [[LIVE]], exec
+; GCN: s_and{{n2|_not1}}_b64 [[LIVE]], [[LIVE]], exec
 ; GCN-NEXT: s_cbranch_scc0
 define amdgpu_ps void @kill_with_loop_exit(float inreg %inp0, float inreg %inp1, <4 x i32> inreg %inp2, float inreg %inp3) {
 .entry:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
index b45189dd54ce..51816da84d1d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
@@ -1,11 +1,12 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,PREGFX10
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,VI,PREGFX10
 ;RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,GFX10
+;RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,GFX10
 
 ;CHECK-LABEL: {{^}}buffer_load:
-;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0{{$}}
-;CHECK: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc{{$}}
-;CHECK: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc{{$}}
+;CHECK: buffer_load_{{dwordx4|b128}} v[0:3], off, s[0:3], 0{{$}}
+;CHECK: buffer_load_{{dwordx4|b128}} v[4:7], off, s[0:3], 0 glc{{$}}
+;CHECK: buffer_load_{{dwordx4|b128}} v[8:11], off, s[0:3], 0 slc{{$}}
 ;CHECK: s_waitcnt
 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
 main_body:
@@ -22,9 +23,9 @@ main_body:
 ;PREGFX10: buffer_load_dwordx4 v[0:3], off, s[0:3], 0{{$}}
 ;PREGFX10: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc{{$}}
 ;PREGFX10: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc{{$}}
-;GFX10: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 dlc{{$}}
-;GFX10: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc{{$}}
-;GFX10: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc dlc{{$}}
+;GFX10: buffer_load_{{dwordx4|b128}} v[0:3], off, s[0:3], 0 dlc{{$}}
+;GFX10: buffer_load_{{dwordx4|b128}} v[4:7], off, s[0:3], 0 glc dlc{{$}}
+;GFX10: buffer_load_{{dwordx4|b128}} v[8:11], off, s[0:3], 0 slc dlc{{$}}
 ;CHECK: s_waitcnt
 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_dlc(<4 x i32> inreg) {
 main_body:
@@ -38,7 +39,7 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
-;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
+;CHECK: buffer_load_{{dwordx4|b128}} v[0:3], off, s[0:3], 0 offset:40
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
 main_body:
@@ -48,7 +49,7 @@ main_body:
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
 ;CHECK: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1ffc
-;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:4
+;CHECK: buffer_load_{{dwordx4|b128}} v[0:3], off, s[0:3], [[OFFSET]] offset:4
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
 main_body:
@@ -57,7 +58,7 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_ofs:
-;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
+;CHECK: buffer_load_{{dwordx4|b128}} v[0:3], v0, s[0:3], 0 offen
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) {
 main_body:
@@ -66,7 +67,7 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
-;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
+;CHECK: buffer_load_{{dwordx4|b128}} v[0:3], v0, s[0:3], 0 offen offset:60
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
 main_body:
@@ -76,7 +77,7 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_x1:
-;CHECK: buffer_load_dword v0, v0, s[0:3], 0 offen
+;CHECK: buffer_load_{{dword|b32}} v0, v0, s[0:3], 0 offen
 ;CHECK: s_waitcnt
 define amdgpu_ps float @buffer_load_x1(<4 x i32> inreg %rsrc, i32 %ofs) {
 main_body:
@@ -85,7 +86,7 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_x2:
-;CHECK: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
+;CHECK: buffer_load_{{dwordx2|b64}} v[0:1], v0, s[0:3], 0 offen
 ;CHECK: s_waitcnt
 define amdgpu_ps <2 x float> @buffer_load_x2(<4 x i32> inreg %rsrc, i32 %ofs) {
 main_body:
@@ -96,7 +97,7 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_load_negative_offset:
 ;PREGFX10: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0
 ;GFX10: v_add_nc_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], -16, v0
-;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen
+;CHECK: buffer_load_{{dwordx4|b128}} v[0:3], [[VOFS]], s[0:3], 0 offen
 define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) {
 main_body:
   %ofs.1 = add i32 %ofs, -16
@@ -121,8 +122,8 @@ entry:
 ;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged_and:
 ;CHECK-NEXT: %bb.
 ;GFX10-NEXT: s_clause
-;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
-;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
+;CHECK-NEXT: buffer_load_{{dwordx4|b128}} v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
 ;CHECK: s_waitcnt
 define amdgpu_ps void @buffer_load_x1_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a) {
 main_body:
@@ -147,8 +148,8 @@ main_body:
 ;CHECK-NEXT: %bb.
 ;CHECK-NEXT: v_lshlrev_b32_e32 v{{[0-9]}}, 6, v0
 ;GFX10-NEXT: s_clause
-;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
-;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:28
+;CHECK-NEXT: buffer_load_{{dwordx4|b128}} v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:28
 ;CHECK: s_waitcnt
 define amdgpu_ps void @buffer_load_x1_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp) {
 main_body:
@@ -173,9 +174,9 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_load_x1_offen_merged_glc_slc:
 ;CHECK-NEXT: %bb.
 ;GFX10-NEXT: s_clause
-;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}}
-;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:12 glc{{$}}
-;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 glc slc{{$}}
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4{{$}}
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:12 glc{{$}}
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 glc slc{{$}}
 ;CHECK: s_waitcnt
 define amdgpu_ps void @buffer_load_x1_offen_merged_glc_slc(<4 x i32> inreg %rsrc, i32 %a) {
 main_body:
@@ -198,7 +199,7 @@ main_body:
 
 ;CHECK-LABEL: {{^}}buffer_load_x2_offen_merged_and:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
+;CHECK-NEXT: buffer_load_{{dwordx4|b128}} v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
 ;CHECK: s_waitcnt
 define amdgpu_ps void @buffer_load_x2_offen_merged_and(<4 x i32> inreg %rsrc, i32 %a) {
 main_body:
@@ -217,7 +218,7 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_load_x2_offen_merged_or:
 ;CHECK-NEXT: %bb.
 ;CHECK-NEXT: v_lshlrev_b32_e32 v{{[0-9]}}, 4, v0
-;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
+;CHECK-NEXT: buffer_load_{{dwordx4|b128}} v[{{[0-9]}}:{{[0-9]}}], v{{[0-9]}}, s[0:3], 0 offen offset:4
 ;CHECK: s_waitcnt
 define amdgpu_ps void @buffer_load_x2_offen_merged_or(<4 x i32> inreg %rsrc, i32 %inp) {
 main_body:
@@ -237,8 +238,8 @@ main_body:
 ;CHECK-LABEL: {{^}}buffer_load_x1_offset_merged:
 ;CHECK-NEXT: %bb.
 ;GFX10-NEXT: s_clause
-;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
-;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
+;CHECK-NEXT: buffer_load_{{dwordx4|b128}} v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
 ;CHECK: s_waitcnt
 define amdgpu_ps void @buffer_load_x1_offset_merged(<4 x i32> inreg %rsrc) {
 main_body:
@@ -255,7 +256,7 @@ main_body:
 
 ;CHECK-LABEL: {{^}}buffer_load_x2_offset_merged:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
+;CHECK-NEXT: buffer_load_{{dwordx4|b128}} v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
 ;CHECK: s_waitcnt
 define amdgpu_ps void @buffer_load_x2_offset_merged(<4 x i32> inreg %rsrc) {
 main_body:
@@ -270,9 +271,9 @@ main_body:
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_int:
-;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-;CHECK: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
-;CHECK: buffer_load_dword v6, off, s[0:3], 0 slc
+;CHECK: buffer_load_{{dwordx4|b128}} v[0:3], off, s[0:3], 0
+;CHECK: buffer_load_{{dwordx2|b64}} v[4:5], off, s[0:3], 0 glc
+;CHECK: buffer_load_{{dword|b32}} v6, off, s[0:3], 0 slc
 ;CHECK: s_waitcnt
 define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(<4 x i32> inreg) {
 main_body:
@@ -290,7 +291,7 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_ubyte:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_ubyte v{{[0-9]}}, off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{ubyte|u8}} v{{[0-9]}}, off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
 ;CHECK-NEXT: ; return to shader part epilog
@@ -304,7 +305,7 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_i16:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_ushort v{{[0-9]}}, off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{ushort|u16}} v{{[0-9]}}, off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK-NEXT: v_cvt_f32_u32_e32 v0, v0
 ;CHECK-NEXT: ; return to shader part epilog
@@ -318,7 +319,7 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_sbyte:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_sbyte v{{[0-9]}}, off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{sbyte|i8}} v{{[0-9]}}, off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
 ;CHECK-NEXT: ; return to shader part epilog
@@ -332,7 +333,7 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_sshort:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_sshort v{{[0-9]}}, off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{sshort|i16}} v{{[0-9]}}, off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
 ;CHECK-NEXT: ; return to shader part epilog
@@ -346,9 +347,9 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_f16:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_ushort [[VAL:v[0-9]+]], off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{ushort|u16}} [[VAL:v[0-9]+]], off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
-;CHECK: ds_write_b16 v0, [[VAL]]
+;CHECK: ds_{{write|store}}_b16 v0, [[VAL]]
 define amdgpu_ps void @raw_buffer_load_f16(<4 x i32> inreg %rsrc, half addrspace(3)* %ptr) {
 main_body:
   %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
@@ -358,9 +359,9 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_v2f16:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_dword [[VAL:v[0-9]+]], off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{dword|b32}} [[VAL:v[0-9]+]], off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
-;CHECK: ds_write_b32 v0, [[VAL]]
+;CHECK: ds_{{write|store}}_b32 v0, [[VAL]]
 define amdgpu_ps void @raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, <2 x half> addrspace(3)* %ptr) {
 main_body:
   %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
@@ -370,9 +371,9 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_v4f16:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} [[VAL:v\[[0-9]+:[0-9]+\]]], off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
-;CHECK: ds_write_b64 v0, [[VAL]]
+;CHECK: ds_{{write|store}}_b64 v0, [[VAL]]
 define amdgpu_ps void @raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, <4 x half> addrspace(3)* %ptr) {
 main_body:
   %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
@@ -382,9 +383,9 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_v2i16:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_dword [[VAL:v[0-9]+]], off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{dword|b32}} [[VAL:v[0-9]+]], off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
-;CHECK: ds_write_b32 v0, [[VAL]]
+;CHECK: ds_{{write|store}}_b32 v0, [[VAL]]
 define amdgpu_ps void @raw_buffer_load_v2i16(<4 x i32> inreg %rsrc, <2 x i16> addrspace(3)* %ptr) {
 main_body:
   %val = call <2 x i16> @llvm.amdgcn.raw.buffer.load.v2i16(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
@@ -394,9 +395,9 @@ main_body:
 
 ;CHECK-LABEL: {{^}}raw_buffer_load_v4i16:
 ;CHECK-NEXT: %bb.
-;CHECK-NEXT: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], off, s[0:3], 0
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} [[VAL:v\[[0-9]+:[0-9]+\]]], off, s[0:3], 0
 ;CHECK: s_waitcnt vmcnt(0)
-;CHECK: ds_write_b64 v0, [[VAL]]
+;CHECK: ds_{{write|store}}_b64 v0, [[VAL]]
 define amdgpu_ps void @raw_buffer_load_v4i16(<4 x i32> inreg %rsrc, <4 x i16> addrspace(3)* %ptr) {
 main_body:
   %val = call <4 x i16> @llvm.amdgcn.raw.buffer.load.v4i16(<4 x i32> %rsrc, i32 0, i32 0, i32 0)
@@ -407,8 +408,8 @@ main_body:
 ;CHECK-LABEL: {{^}}raw_buffer_load_x1_offset_merged:
 ;CHECK-NEXT: %bb.
 ;GFX10-NEXT: s_clause
-;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
-;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
+;CHECK-NEXT: buffer_load_{{dwordx4|b128}} v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
+;CHECK-NEXT: buffer_load_{{dwordx2|b64}} v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
 ;CHECK: s_waitcnt
 define amdgpu_ps void @raw_buffer_load_x1_offset_merged(<4 x i32> inreg %rsrc) {
 main_body:
@@ -426,12 +427,12 @@ main_body:
 ;CHECK-LABEL: {{^}}raw_buffer_load_x1_offset_swizzled_not_merged:
 ;CHECK-NEXT: %bb.
 ;GFX10-NEXT: s_clause
-;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:4
-;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:8
-;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:12
-;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:16
-;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:28
-;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:32
+;CHECK-NEXT: buffer_load_{{dword|b32}} v{{[0-9]}}, off, s[0:3], 0 offset:4
+;CHECK-NEXT: buffer_load_{{dword|b32}} v{{[0-9]}}, off, s[0:3], 0 offset:8
+;CHECK-NEXT: buffer_load_{{dword|b32}} v{{[0-9]}}, off, s[0:3], 0 offset:12
+;CHECK-NEXT: buffer_load_{{dword|b32}} v{{[0-9]}}, off, s[0:3], 0 offset:16
+;CHECK-NEXT: buffer_load_{{dword|b32}} v{{[0-9]}}, off, s[0:3], 0 offset:28
+;CHECK-NEXT: buffer_load_{{dword|b32}} v{{[0-9]}}, off, s[0:3], 0 offset:32
 ;CHECK: s_waitcnt
 define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged(<4 x i32> inreg %rsrc) {
 main_body:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
index 8804aeb1f500..c9eec415c6a8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
@@ -2,10 +2,11 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s
 
 ; GCN-LABEL: {{^}}tbuffer_load_d16_x:
 ; PREGFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
-; GFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
+; GFX10: tbuffer_load_{{format_d16|d16_format}}_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
 define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
 main_body:
   %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
@@ -17,7 +18,7 @@ main_body:
 ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
 
 ; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
-; GFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
+; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
 ; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]]
 define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
 main_body:
@@ -32,8 +33,8 @@ main_body:
 ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
 
 ; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
-; GFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
-; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
+; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
+; PACKED: v_mov_b{{16|32}}_e32 v{{[0-9]+(\.(l|h))?}}, v[[HI]]{{(\.(l,h))?}}
 define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
 main_body:
   %data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
@@ -47,7 +48,7 @@ main_body:
 ; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
 
 ; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
-; GFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
+; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT]
 ; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
 define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
 main_body:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
index ae97f462e569..503a8c70c830 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
@@ -2,11 +2,12 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s
 
 ; GCN-LABEL: {{^}}tbuffer_load_d16_x:
 ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
 ; PREGFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
-; GFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
+; GFX10: tbuffer_load_{{format_d16|d16_format}}_x v{{[0-9]+}}, [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
 define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
 main_body:
   %data = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
@@ -19,7 +20,7 @@ main_body:
 ; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
 
 ; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
-; GFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
+; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xy v[[FULL:[0-9]+]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
 ; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]]
 define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
 main_body:
@@ -34,8 +35,8 @@ main_body:
 ; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
 
 ; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
-; GFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
-; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
+; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
+; PACKED: v_mov_b{{16|32}}_e32 v{{[0-9]+(\.(l|h))?}}, v[[HI]]{{(\.(l,h))?}}
 define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
 main_body:
   %data = call <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
@@ -49,7 +50,7 @@ main_body:
 ; PREGFX10-UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]]
 
 ; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
-; GFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
+; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], [[ZEROREG]], s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] idxen
 ; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]]
 define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
 main_body:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
index 8dc07bf434e0..a2fb4fdb88b5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9
 ; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 
 declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
index 8595bf1296de..8bc53c952b35 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX9
 ; RUN: llc -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 
 declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 40bb45de25f7..224a7bb35c27 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
 
 ; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s
 ; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s
@@ -10,13 +12,15 @@
 ; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
 ; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
 ; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
 
 ; GCN-LABEL: {{^}}fold_wavefrontsize:
 ; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
 
 ; W32:       v_mov_b32_e32 [[V:v[0-9]+]], 32
 ; W64:       v_mov_b32_e32 [[V:v[0-9]+]], 64
-; GCN:       store_dword v{{.+}}, [[V]]
+; GCN:       store_{{dword|b32}} v{{.+}}, [[V]]
 
 ; OPT-W32:   store i32 32, i32 addrspace(1)* %arg, align 4
 ; OPT-W64:   store i32 64, i32 addrspace(1)* %arg, align 4
@@ -37,7 +41,7 @@ bb:
 ; W32:       v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}}
 ; W64:       v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}}
 ; GCN-NOT:   cndmask
-; GCN:       store_dword v{{.+}}, [[V]]
+; GCN:       store_{{dword|b32}} v{{.+}}, [[V]]
 
 ; OPT-W32:   store i32 1, i32 addrspace(1)* %arg, align 4
 ; OPT-W64:   store i32 2, i32 addrspace(1)* %arg, align 4

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
index 0001397123de..f41a184f3179 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
@@ -5,6 +5,7 @@
 ; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
 ; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,UNPACKED %s
 ; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -18,7 +19,7 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
 ; CO-V2: enable_vgpr_workitem_id = 0
 
 ; ALL-NOT: v0
-; ALL: {{buffer|flat|global}}_store_dword {{.*}}v0
+; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
 
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0
 define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
@@ -37,7 +38,7 @@ define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
 ; CO-V2: {{buffer|flat}}_store_dword {{.*}}v1
 
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
-; PACKED-TID: {{buffer|flat|global}}_store_dword {{.*}}[[ID]]
+; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1
 define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.y()
@@ -55,7 +56,7 @@ define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
 ; CO-V2: {{buffer|flat}}_store_dword {{.*}}v2
 
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
-; PACKED-TID: {{buffer|flat|global}}_store_dword {{.*}}[[ID]]
+; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2
 define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.z()
@@ -73,8 +74,8 @@ define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
 ; PACKED: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x3ff, v0
 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
 
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work_group_size !0 {
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
   %id.y = call i32 @llvm.amdgcn.workitem.id.y()
@@ -89,14 +90,14 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work
 ; CO-V2: enable_vgpr_workitem_id = 1
 
 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 
 ; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
 
 ; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 10
 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
 
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work_group_size !1 {
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
   %id.y = call i32 @llvm.amdgcn.workitem.id.y()
@@ -111,8 +112,8 @@ define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work
 ; CO-V2: enable_vgpr_workitem_id = 2
 
 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
-; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
+; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 
 ; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v2
 

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
index ac1f92ef93f9..bf75801a21d0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,CIGFX9 %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,CIGFX9 %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
 
 declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0
 

diff  --git a/llvm/test/CodeGen/AMDGPU/mad.u16.ll b/llvm/test/CodeGen/AMDGPU/mad.u16.ll
index 7d165b09b3f6..42f723d62c5a 100644
--- a/llvm/test/CodeGen/AMDGPU/mad.u16.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad.u16.ll
@@ -1,17 +1,18 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
 
 ; FIXME: GFX9 should be producing v_mad_u16 instead of v_mad_legacy_u16.
 
 ; GCN-LABEL: {{^}}mad_u16
-; GCN: {{flat|global}}_load_ushort v[[A:[0-9]+]]
-; GCN: {{flat|global}}_load_ushort v[[B:[0-9]+]]
-; GCN: {{flat|global}}_load_ushort v[[C:[0-9]+]]
+; GCN: {{flat|global}}_load_{{ushort|u16}} v[[A:[0-9]+]]
+; GCN: {{flat|global}}_load_{{ushort|u16}} v[[B:[0-9]+]]
+; GCN: {{flat|global}}_load_{{ushort|u16}} v[[C:[0-9]+]]
 ; GFX8: v_mad_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
 ; GFX9: v_mad_legacy_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
 ; GFX10: v_mad_u16 v[[R:[0-9]+]], v[[A]], v[[B]], v[[C]]
-; GCN: {{flat|global}}_store_short v{{.+}}, v[[R]]
+; GCN: {{flat|global}}_store_{{short|b16}} v{{.+}}, v[[R]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @mad_u16(
     i16 addrspace(1)* %r,

diff  --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index d2cc65f922e0..b8bb3a5a242a 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W64
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W32
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W64
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W32
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W64
 ; RUN: llc -O0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -verify-machine-dom-info -o - %s | FileCheck %s --check-prefix=W64-O0
 
 ; Test that we correctly legalize VGPR Rsrc operands in MUBUF instructions.
@@ -79,8 +81,8 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 ; W64: s_cbranch_execnz [[LOOPBB1]]
 
 ; W64: s_mov_b64 exec, [[SAVEEXEC]]
-; W64-DAG: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES0]], off
-; W64-DAG: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES1]], off
+; W64-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES0]], off
+; W64-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES1]], off
 
 
 ; W32-LABEL: mubuf_vgpr_adjacent_in_block
@@ -117,8 +119,8 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
 ; W32: s_cbranch_execnz [[LOOPBB1]]
 
 ; W32: s_mov_b32 exec_lo, [[SAVEEXEC]]
-; W32-DAG: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES0]], off
-; W32-DAG: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES1]], off
+; W32-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES0]], off
+; W32-DAG: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES1]], off
 
 define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, float addrspace(1)* %out0, float addrspace(1)* %out1) #0 {
 entry:
@@ -171,7 +173,7 @@ entry:
 ; W64: s_mov_b64 exec, [[SAVEEXEC]]
 
 ; W64: [[TERMBB]]:
-; W64: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]], off
+; W64: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES]], off
 
 
 ; W32-LABEL: mubuf_vgpr_outside_entry
@@ -215,7 +217,7 @@ entry:
 ; W32: s_mov_b32 exec_lo, [[SAVEEXEC]]
 
 ; W32: [[TERMBB]]:
-; W32: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]], off
+; W32: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[RES]], off
 
 
 ; Confirm spills do not occur between the XOR and branch that terminate the

diff  --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll
index def945fc3b4a..5678e90dc3d9 100644
--- a/llvm/test/CodeGen/AMDGPU/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul.ll
@@ -1,7 +1,8 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,FUNC %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9_10 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9_10 %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9PLUS %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9PLUS %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=FUNC,GFX9PLUS %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=EG,FUNC %s
 
 ; mul24 and mad24 are affected
@@ -144,11 +145,11 @@ define amdgpu_kernel void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %
 ; crash with a 'failed to select' error.
 
 ; FUNC-LABEL: {{^}}s_mul_i64:
-; GFX9_10-DAG: s_mul_i32
-; GFX9_10-DAG: s_mul_hi_u32
-; GFX9_10-DAG: s_mul_i32
-; GFX9_10-DAG: s_mul_i32
-; GFX9_10: s_endpgm
+; GFX9PLUS-DAG: s_mul_i32
+; GFX9PLUS-DAG: s_mul_hi_u32
+; GFX9PLUS-DAG: s_mul_i32
+; GFX9PLUS-DAG: s_mul_i32
+; GFX9PLUS: s_endpgm
 define amdgpu_kernel void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %mul = mul i64 %a, %b
   store i64 %mul, i64 addrspace(1)* %out, align 8

diff  --git a/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll b/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
index 1d0861feec77..713006a26272 100644
--- a/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
+++ b/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
@@ -1,8 +1,9 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9_10,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_1:
-; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
+; GCN: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
 define amdgpu_kernel void @test_pk_max_f16_literal_0_1(<2 x half> addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -15,7 +16,7 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_0:
-; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
+; GCN: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0{{$}}
 define amdgpu_kernel void @test_pk_max_f16_literal_1_0(<2 x half> addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -28,7 +29,7 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_1_1:
-; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
+; GCN: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 1.0 op_sel_hi:[1,0]{{$}}
 define amdgpu_kernel void @test_pk_max_f16_literal_1_1(<2 x half> addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -41,7 +42,7 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_m1:
-; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
+; GCN: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel:[0,1] op_sel_hi:[1,0]{{$}}
 define amdgpu_kernel void @test_pk_max_f16_literal_0_m1(<2 x half> addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -54,7 +55,7 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_0:
-; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
+; GCN: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0{{$}}
 define amdgpu_kernel void @test_pk_max_f16_literal_m1_0(<2 x half> addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -67,7 +68,7 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_m1_m1:
-; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
+; GCN: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, -1.0 op_sel_hi:[1,0]{{$}}
 define amdgpu_kernel void @test_pk_max_f16_literal_m1_m1(<2 x half> addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -80,7 +81,7 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_0:
-; GFX9_10: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
+; GCN: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, 0{{$}}
 define amdgpu_kernel void @test_pk_max_f16_literal_0_0(<2 x half> addrspace(1)* nocapture %arg) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()

diff  --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
index 789aa514695e..0adcc5427282 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
@@ -2,9 +2,10 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX900 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX906 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
 ; GCN-LABEL: {{^}}shl_i16:
-; GCN: v_lshlrev_b16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_lshlrev_b16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @shl_i16(i16 %x, i16 %y) {
   %res = shl i16 %x, %y
@@ -12,7 +13,7 @@ define i16 @shl_i16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}lshr_i16:
-; GCN: v_lshrrev_b16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_lshrrev_b16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @lshr_i16(i16 %x, i16 %y) {
   %res = lshr i16 %x, %y
@@ -20,7 +21,7 @@ define i16 @lshr_i16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}ashr_i16:
-; GCN: v_ashrrev_i16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_ashrrev_i16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @ashr_i16(i16 %x, i16 %y) {
   %res = ashr i16 %x, %y
@@ -28,7 +29,7 @@ define i16 @ashr_i16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}add_u16:
-; GCN: v_add_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_add_{{(nc_)*}}u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @add_u16(i16 %x, i16 %y) {
   %res = add i16 %x, %y
@@ -36,7 +37,7 @@ define i16 @add_u16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}sub_u16:
-; GCN: v_sub_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_sub_{{(nc_)*}}u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @sub_u16(i16 %x, i16 %y) {
   %res = sub i16 %x, %y
@@ -44,7 +45,7 @@ define i16 @sub_u16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}mul_lo_u16:
-; GCN: v_mul_lo_u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_mul_lo_u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @mul_lo_u16(i16 %x, i16 %y) {
   %res = mul i16 %x, %y
@@ -52,7 +53,7 @@ define i16 @mul_lo_u16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}min_u16:
-; GCN: v_min_u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_min_u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @min_u16(i16 %x, i16 %y) {
   %cmp = icmp ule i16 %x, %y
@@ -61,7 +62,7 @@ define i16 @min_u16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}min_i16:
-; GCN: v_min_i16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_min_i16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @min_i16(i16 %x, i16 %y) {
   %cmp = icmp sle i16 %x, %y
@@ -70,7 +71,7 @@ define i16 @min_i16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}max_u16:
-; GCN: v_max_u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_max_u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @max_u16(i16 %x, i16 %y) {
   %cmp = icmp uge i16 %x, %y
@@ -79,7 +80,7 @@ define i16 @max_u16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}max_i16:
-; GCN: v_max_i16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_max_i16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GCN-NEXT: s_setpc_b64
 define i16 @max_i16(i16 %x, i16 %y) {
   %cmp = icmp sge i16 %x, %y
@@ -88,7 +89,7 @@ define i16 @max_i16(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}shl_i16_zext_i32:
-; GCN: v_lshlrev_b16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_lshlrev_b16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @shl_i16_zext_i32(i16 %x, i16 %y) {
@@ -98,7 +99,7 @@ define i32 @shl_i16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}lshr_i16_zext_i32:
-; GCN: v_lshrrev_b16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_lshrrev_b16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) {
@@ -108,7 +109,7 @@ define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}ashr_i16_zext_i32:
-; GCN: v_ashrrev_i16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_ashrrev_i16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) {
@@ -118,7 +119,7 @@ define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}add_u16_zext_i32:
-; GCN: v_add_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_add_{{(nc_)*}}u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @add_u16_zext_i32(i16 %x, i16 %y) {
@@ -128,7 +129,7 @@ define i32 @add_u16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}sub_u16_zext_i32:
-; GCN: v_sub_{{(nc_)*}}u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_sub_{{(nc_)*}}u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @sub_u16_zext_i32(i16 %x, i16 %y) {
@@ -138,7 +139,7 @@ define i32 @sub_u16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}mul_lo_u16_zext_i32:
-; GCN: v_mul_lo_u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_mul_lo_u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) {
@@ -148,7 +149,7 @@ define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}min_u16_zext_i32:
-; GCN: v_min_u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_min_u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @min_u16_zext_i32(i16 %x, i16 %y) {
@@ -159,7 +160,7 @@ define i32 @min_u16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}min_i16_zext_i32:
-; GCN: v_min_i16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_min_i16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @min_i16_zext_i32(i16 %x, i16 %y) {
@@ -170,7 +171,7 @@ define i32 @min_i16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}max_u16_zext_i32:
-; GCN: v_max_u16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_max_u16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @max_u16_zext_i32(i16 %x, i16 %y) {
@@ -181,7 +182,7 @@ define i32 @max_u16_zext_i32(i16 %x, i16 %y) {
 }
 
 ; GCN-LABEL: {{^}}max_i16_zext_i32:
-; GCN: v_max_i16{{[_e32]*}} [[OP:v[0-9]+]],
+; GCN: v_max_i16{{(_e32|_e64)?}} [[OP:v[0-9]+]],
 ; GFX10-NEXT: v_and_b32_e32 v{{[0-9]+}}, 0xffff, [[OP]]
 ; GCN-NEXT: s_setpc_b64
 define i32 @max_i16_zext_i32(i16 %x, i16 %y) {

diff  --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index 44dc9aaf03d1..3867e6d65728 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare half @llvm.fabs.f16(half)
@@ -52,7 +53,7 @@ define amdgpu_kernel void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f)
 ; (select (cmp (sgprX, constant)), constant, sgprZ)
 
 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprZ_f32:
-; GCN: s_load_dwordx2 s[[[X:[0-9]+]]:[[Z:[0-9]+]]], s[0:1], {{0x4c|0x13}}
+; GCN: s_load_{{dwordx2|b64}} s[[[X:[0-9]+]]:[[Z:[0-9]+]]], s[0:1], {{0x4c|0x13}}
 
 ; SIVI-DAG:  v_cmp_nlg_f32_e64 [[CC:vcc]], s[[X]], 0
 ; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], s[[X]], 0
@@ -70,7 +71,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprZ_f32(float addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_sgprX_f32:
-; GCN: s_load_dword [[X:s[0-9]+]]
+; GCN: s_load_{{dword|b32}} [[X:s[0-9]+]]
 ; SIVI-DAG:  v_cmp_nlg_f32_e64 [[CC:vcc]], [[X]], 0
 ; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], [[X]], 0
 ; SIVI-DAG:  v_mov_b32_e32 [[VX:v[0-9]+]], [[X]]
@@ -87,7 +88,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_sgprX_f32(float addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprZ_f32:
-; GCN-DAG: s_load_dwordx2 s[[[X:[0-9]+]]:[[Z:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
+; GCN-DAG: s_load_{{dwordx2|b64}} s[[[X:[0-9]+]]:[[Z:[0-9]+]]], s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}}
 ; SIVI-DAG:  v_cmp_nlg_f32_e64 [[CC:vcc]], s[[X]], 0
 ; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], s[[X]], 0
 ; SIVI-DAG:  v_mov_b32_e32 [[VZ:v[0-9]+]], s[[Z]]
@@ -104,7 +105,7 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprZ_f32(float addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_sgprX_f32:
-; GCN: s_load_dword [[X:s[0-9]+]]
+; GCN: s_load_{{dword|b32}} [[X:s[0-9]+]]
 ; SIVI-DAG:  v_cmp_nlg_f32_e64 [[CC:vcc]], [[X]], 0
 ; GFX10-DAG: v_cmp_nlg_f32_e64 [[CC:s\[[0-9:]+\]]], [[X]], 0
 ; SIVI-DAG:  v_mov_b32_e32 [[VX:v[0-9]+]], [[X]]
@@ -121,8 +122,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_sgprX_f32(float addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k0_vgprZ_f32:
-; GCN-DAG: s_load_dword [[X:s[0-9]+]]
-; GCN-DAG: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
+; GCN-DAG: s_load_{{dword|b32}} [[X:s[0-9]+]]
+; GCN-DAG: {{buffer|flat|global}}_load_{{dword|b32}} [[Z:v[0-9]+]]
 ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 0, [[Z]], [[COND]]
 define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
@@ -138,8 +139,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k0_vgprZ_f32(float addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}fcmp_sgprX_k0_select_k1_vgprZ_f32:
-; GCN-DAG: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
-; GCN-DAG: s_load_dword [[X:s[0-9]+]]
+; GCN-DAG: {{buffer|flat|global}}_load_{{dword|b32}} [[Z:v[0-9]+]]
+; GCN-DAG: s_load_{{dword|b32}} [[X:s[0-9]+]]
 ; GCN-DAG: v_cmp_nlg_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], [[X]], 0
 ; GCN: v_cndmask_b32_e{{32|64}} v{{[0-9]+}}, 1.0, [[Z]], [[COND]]
 define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float %x, float addrspace(1)* %z.ptr) #0 {
@@ -155,8 +156,8 @@ define amdgpu_kernel void @fcmp_sgprX_k0_select_k1_vgprZ_f32(float addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_sgprZ_f32:
-; GCN-DAG: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN-DAG: s_load_dword [[Z:s[0-9]+]]
+; GCN-DAG: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN-DAG: s_load_{{dword|b32}} [[Z:s[0-9]+]]
 ; GCN-DAG: v_cmp_ngt_f32_e32 vcc, 0, [[X]]
 ; SIVI-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
 ; SIVI:     v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[VZ]], vcc
@@ -174,8 +175,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_sgprZ_f32(float addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[Z:v[0-9]+]]
 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[Z]], vcc
 define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)* %out, float addrspace(1)* %x.ptr, float addrspace(1)* %z.ptr) #0 {
@@ -193,8 +194,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_f32(float addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i32:
-; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[Z:v[0-9]+]]
 ; GCN: v_cmp_lt_i32_e32 vcc, -1, [[X]]
 ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 2, [[Z]], vcc
 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i32 addrspace(1)* %z.ptr) #0 {
@@ -212,8 +213,8 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i32(i32 addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i64:
-; GCN: {{buffer|flat|global}}_load_dwordx2 v[[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]]
-; GCN-DAG: {{buffer|flat|global}}_load_dwordx2 v[[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]]
+; GCN: {{buffer|flat|global}}_load_{{dwordx2|b64}} v[[[X_LO:[0-9]+]]:[[X_HI:[0-9]+]]]
+; GCN-DAG: {{buffer|flat|global}}_load_{{dwordx2|b64}} v[[[Z_LO:[0-9]+]]:[[Z_HI:[0-9]+]]]
 ; GCN-DAG: v_cmp_lt_i64_e32 vcc, -1, v[[[X_LO]]:[[X_HI]]]
 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v[[Z_HI]], vcc
 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 2, v[[Z_LO]], vcc
@@ -232,8 +233,8 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i64(i64 addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_vgprZ_k1_v4f32:
-; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dwordx4
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b128}}
 
 ; GCN: v_cmp_nge_f32_e32 vcc, 4.0, [[X]]
 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
@@ -255,8 +256,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_vgprZ_k1_v4f32(<4 x float> addrs
 }
 
 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_select_k1_vgprZ_v4f32:
-; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dwordx4
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b128}}
 
 ; GCN: v_cmp_ge_f32_e32 vcc, 4.0, [[X]]
 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
@@ -281,8 +282,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_select_k1_vgprZ_v4f32(<4 x float> addrs
 ; multiple uses.
 
 ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_v4f32:
-; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dwordx4
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b128}}
 
 ; GCN: v_cmp_le_f32_e32 vcc, 4.0, [[X]]
 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
@@ -304,14 +305,14 @@ define amdgpu_kernel void @fcmp_k0_vgprX_select_k1_vgprZ_v4f32(<4 x float> addrs
 }
 
 ; GCN-LABEL: {{^}}icmp_vgprX_k0_select_k1_vgprZ_i1:
-; GCN: load_dword
-; GCN: load_ubyte
+; GCN: load_{{dword|b32}}
+; GCN: load_{{ubyte|u8}}
 ; GCN-DAG: v_cmp_gt_i32_e32 vcc, 0, v
 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 1,
 ; GCN-DAG: v_cmp_eq_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, 1, v
 ; GCN-DAG: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, s{{\[[0-9]+:[0-9]+\]}}
 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s
-; GCN: store_byte
+; GCN: store_{{byte|b8}}
 define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %x.ptr, i1 addrspace(1)* %z.ptr) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %tid.ext = sext i32 %tid to i64
@@ -329,8 +330,8 @@ define amdgpu_kernel void @icmp_vgprX_k0_select_k1_vgprZ_i1(i1 addrspace(1)* %ou
 ; Different types compared vs. selected
 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selectf64_k1_vgprZ_f32:
 ; SIVI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3ff00000
-; GCN-DAG: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN-DAG: {{buffer|flat|global}}_load_dwordx2
+; GCN-DAG: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN-DAG: {{buffer|flat|global}}_load_{{dwordx2|b64}}
 
 ; GCN: v_cmp_le_f32_e32 vcc, 0, [[X]]
 ; SIVI-DAG:  v_cndmask_b32_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}, vcc
@@ -352,8 +353,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selectf64_k1_vgprZ_f32(double addrspace
 
 ; Different types compared vs. selected
 ; GCN-LABEL: {{^}}fcmp_vgprX_k0_selecti64_k1_vgprZ_f32:
-; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dwordx2
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dwordx2|b64}}
 
 ; GCN: v_cmp_nlg_f32_e32 vcc, 0, [[X]]
 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 3, v{{[0-9]+}}, vcc
@@ -374,8 +375,8 @@ define amdgpu_kernel void @fcmp_vgprX_k0_selecti64_k1_vgprZ_f32(i64 addrspace(1)
 
 ; Different types compared vs. selected
 ; GCN-LABEL: {{^}}icmp_vgprX_k0_selectf32_k1_vgprZ_i32:
-; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[Z:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[Z:v[0-9]+]]
 
 ; GCN: v_cmp_gt_u32_e32 vcc, 2, [[X]]
 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 4.0, [[Z]], vcc
@@ -396,7 +397,7 @@ define amdgpu_kernel void @icmp_vgprX_k0_selectf32_k1_vgprZ_i32(float addrspace(
 ; FIXME: Should be able to handle multiple uses
 
 ; GCN-LABEL: {{^}}fcmp_k0_vgprX_select_k1_vgprZ_f32_cond_use_x2:
-; GCN: {{buffer|flat|global}}_load_dword [[X:v[0-9]+]]
+; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[X:v[0-9]+]]
 
 ; GCN: v_cmp_nle_f32_e32 vcc, 4.0, [[X]]
 ; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, -1.0, vcc


        


More information about the llvm-commits mailing list