[llvm] a59c3eb - [AMDGPU] Add GFX11 coverage to shared sdag/gisel tests

Fri Jul 8 01:43:09 PDT 2022

Author: Jay Foad
Date: 2022-07-08T09:40:20+01:00
New Revision: a59c3eb2f37d4c8d0a8e48a698cec9314c7f4853

URL: https://github.com/llvm/llvm-project/commit/a59c3eb2f37d4c8d0a8e48a698cec9314c7f4853
DIFF: https://github.com/llvm/llvm-project/commit/a59c3eb2f37d4c8d0a8e48a698cec9314c7f4853.diff

LOG: [AMDGPU] Add GFX11 coverage to shared sdag/gisel tests

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
    llvm/test/CodeGen/AMDGPU/readcyclecounter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll
index 17293926f035..449a8ab04ba0 100644

--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll
@@ -3,6 +3,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.barrier.ll
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll
 
 ; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos.
 ; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll
index 942c991bbd46..3dceb31d9272 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll
@@ -3,3 +3,4 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll
index 6c807def1b35..ada126725371 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll
@@ -3,3 +3,4 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll
index 9a287359d4db..ba67a6e6365e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll
@@ -1 +1,2 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll | FileCheck -check-prefixes=GCN %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll | FileCheck -check-prefixes=GCN %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll
index 22826a7e9937..9a6bebd5a31c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll
@@ -1,2 +1,3 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -global-isel -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel -amdgpu-enable-delay-alu=0 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll
index 16bf1a6b535c..d6d20abcd8aa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll
@@ -1,3 +1,5 @@
 ; Runs original SDAG test with -global-isel
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032 %S/../llvm.amdgcn.init.exec.wave32.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032 %S/../llvm.amdgcn.init.exec.wave32.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1064 %S/../llvm.amdgcn.init.exec.wave32.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1064 %S/../llvm.amdgcn.init.exec.wave32.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll
index 07be54d35779..e644b907824a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll
@@ -1 +1,2 @@
-; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../llvm.amdgcn.permlane.ll | FileCheck -check-prefixes=GCN,GFX10 %S/../llvm.amdgcn.permlane.ll
+; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../llvm.amdgcn.permlane.ll | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %S/../llvm.amdgcn.permlane.ll
+; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %S/../llvm.amdgcn.permlane.ll | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %S/../llvm.amdgcn.permlane.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll
index a0913eeb9f96..715f3820787f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll
@@ -1,3 +1,4 @@
 ; Runs original SDAG test with -global-isel
 ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE64  %S/../llvm.amdgcn.wqm.vote.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %S/../llvm.amdgcn.wqm.vote.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %S/../llvm.amdgcn.wqm.vote.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll
index a1d5312b259c..8d9ab9cada75 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll
@@ -2,3 +2,4 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %S/../readcyclecounter.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=GCN %S/../readcyclecounter.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %S/../readcyclecounter.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %S/../readcyclecounter.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
index 9faacc8ebde7..8ca8148e8820 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
@@ -3,6 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s
 
 ; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos.
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s
@@ -11,7 +12,7 @@
 
 ; Minimum offset
 ; GCN-LABEL: {{^}}gws_barrier_offset0:
-; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}
 ; NOLOOP: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier v0 gds{{$}}
@@ -39,7 +40,7 @@ define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
 
 ; Maximum offset
 ; GCN-LABEL: {{^}}gws_barrier_offset63:
-; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}
 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier v0 offset:63 gds{{$}}
@@ -50,7 +51,7 @@ define amdgpu_kernel void @gws_barrier_offset63(i32 %val) #0 {
 
 ; FIXME: Should be able to shift directly into m0
 ; GCN-LABEL: {{^}}gws_barrier_sgpr_offset:
-; NOLOOP-DAG: s_load_dwordx2 s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
+; NOLOOP-DAG: s_load_{{dwordx2|b64}} s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
 
 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
@@ -67,7 +68,7 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
 
 ; Variable offset in SGPR with constant add
 ; GCN-LABEL: {{^}}gws_barrier_sgpr_offset_add1:
-; NOLOOP-DAG: s_load_dwordx2 s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
+; NOLOOP-DAG: s_load_{{dwordx2|b64}} s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
 
 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
@@ -83,7 +84,7 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.ba
 }
 
 ; GCN-LABEL: {{^}}gws_barrier_vgpr_offset:
-; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
 
 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
@@ -101,7 +102,7 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
 
 ; Variable offset in VGPR with constant add
 ; GCN-LABEL: {{^}}gws_barrier_vgpr_offset_add:
-; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
 
 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
@@ -166,7 +167,7 @@ define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, i32 addrspace(1)* %
 ; NOLOOP: s_mov_b32 m0, 0{{$}}
 ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; NOLOOP: load_dword
+; NOLOOP: load_{{dword|b32}}
 define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, i32 addrspace(1)* %ptr) #0 {
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
   %load = load volatile i32, i32 addrspace(1)* %ptr
@@ -176,7 +177,7 @@ define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, i32 addrspace(1)* %p
 ; Does not imply memory fence on its own
 ; GCN-LABEL: {{^}}gws_barrier_fence_before:
 ; NOLOOP: s_mov_b32 m0, 0{{$}}
-; NOLOOP: store_dword
+; NOLOOP: store_{{dword|b32}}
 ; NOLOOP: s_waitcnt vmcnt(0) lgkmcnt(0)
 ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -193,7 +194,8 @@ define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, i32 addrspace(1)*
 ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; NOLOOP-NEXT: load_dword
+; NOLOOP-NEXT: load_{{dword|b32}}
+
 define amdgpu_kernel void @gws_barrier_fence_after(i32 %val, i32 addrspace(1)* %ptr) #0 {
   call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
   fence release
@@ -220,6 +222,7 @@ define amdgpu_kernel void @gws_init_barrier(i32 %val) #0 {
 ; NOLOOP: ds_gws_init v0 offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+
 ; NOLOOP-NEXT: ds_gws_barrier v0 offset:7 gds
 ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 define amdgpu_kernel void @gws_init_fence_barrier(i32 %val) #0 {

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
index 9d271075eba5..f656af44746f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
@@ -3,10 +3,11 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
 
 ; Minimum offset
 ; GCN-LABEL: {{^}}gws_init_offset0:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; GCN-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; GCN-DAG: s_mov_b32 m0, 0{{$}}
 ; GCN: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_init v0 gds{{$}}
@@ -25,7 +26,7 @@ define amdgpu_kernel void @gws_init_offset0(i32 %val) #0 {
 
 ; Maximum offset
 ; GCN-LABEL: {{^}}gws_init_offset63:
-; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}
 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_init v0 offset:63 gds{{$}}
@@ -46,7 +47,7 @@ define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 {
 
 ; FIXME: Should be able to shift directly into m0
 ; GCN-LABEL: {{^}}gws_init_sgpr_offset:
-; NOLOOP-DAG: s_load_dwordx2 s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
+; NOLOOP-DAG: s_load_{{dwordx2|b64}} s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
 
 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
@@ -62,7 +63,7 @@ define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
 
 ; Variable offset in SGPR with constant add
 ; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1:
-; NOLOOP-DAG: s_load_dwordx2 s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
+; NOLOOP-DAG: s_load_{{dwordx2|b64}} s[[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]]
 
 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
 ; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
@@ -78,7 +79,7 @@ define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base)
 }
 
 ; GCN-LABEL: {{^}}gws_init_vgpr_offset:
-; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
 
 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
@@ -96,7 +97,7 @@ define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
 
 ; Variable offset in VGPR with constant add
 ; GCN-LABEL: {{^}}gws_init_vgpr_offset_add:
-; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
 
 ; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
index ffa65acca97d..18f187a0bb71 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
@@ -3,9 +3,10 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
 
 ; GCN-LABEL: {{^}}gws_sema_br_offset0:
-; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
+; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}
 ; NOLOOP: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_sema_br v0 gds{{$}}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
index e95f49a5fa8e..4296455a018d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
 
 ; GCN-LABEL: {{^}}ds_ordered_add:
-; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: v_{{(dual_)?}}mov_b32{{(_e32)?}} v[[INCR:[0-9]+]], 31
 ; GCN-DAG: s_mov_b32 m0,
 ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
 define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
@@ -11,7 +12,7 @@ define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addr
 }
 
 ; GCN-LABEL: {{^}}ds_ordered_add_4dw:
-; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN-DAG: v_{{(dual_)?}}mov_b32{{(_e32)?}} v[[INCR:[0-9]+]], 31
 ; GCN-DAG: s_mov_b32 m0,
 ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:49924 gds
 define amdgpu_kernel void @ds_ordered_add_4dw(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
index 27adb5c3d026..aaf73ea8d6c9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}full_mask:
 ; GCN: s_mov_b64 exec, -1
@@ -152,9 +153,9 @@ main_body:
 ; GCN-LABEL: {{^}}init_exec_input_before_frame_materialize_nonentry:
 ; GCN-NOT: {{^}}v_
 ; GCN: %endif
-; GCN: s_bfe_u32 s3, s2, 0x70008
-; GCN-NEXT: s_bfm_b64 exec, s3, 0
-; GCN-NEXT: s_cmp_eq_u32 s3, 64
+; GCN: s_bfe_u32 [[S:s[0-9]+]], s2, 0x70008
+; GCN-NEXT: s_bfm_b64 exec, [[S]], 0
+; GCN-NEXT: s_cmp_eq_u32 [[S]], 64
 ; GCN-NEXT: s_cmov_b64 exec, -1
 ; GCN: v_mov
 ; GCN: v_add

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
index c65c549b7dc5..1355274ae9ea 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s
 
 ; GCN-LABEL: {{^}}test_init_exec:
 ; GFX1032: s_mov_b32 exec_lo, 0x12345

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
index e6b51496b10d..862dfe7154fd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
@@ -1,4 +1,5 @@
-; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
+; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
 
 declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1
 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1
@@ -6,8 +7,8 @@ declare i32 @llvm.amdgcn.workitem.id.x()
 declare i32 @llvm.amdgcn.workitem.id.y()
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_vss:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vss(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -15,8 +16,8 @@ define amdgpu_kernel void @v_permlane16_b32_vss(i32 addrspace(1)* %out, i32 %src
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_vii:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src0) #1 {
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -24,11 +25,11 @@ define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_vll:
-; FIXME-GFX10: It is allowed to have both immediates as literals
-; GFX10-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
-; GFX10-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
+; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
+; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -38,7 +39,11 @@ define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src
 ; GCN-LABEL: {{^}}v_permlane16_b32_vvv:
 ; GFX10-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
 ; GFX10-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v1
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; GFX11-DAG: v_and_b32_e32 [[VSRC1:v[0-9]+]],
+; GFX11-DAG: v_bfe_u32 [[VSRC2:v[0-9]+]],
+; GFX11-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], [[VSRC1]]
+; GFX11-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], [[VSRC2]]
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vvv(i32 addrspace(1)* %out, i32 %src0) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %tidy = call i32 @llvm.amdgcn.workitem.id.y()
@@ -48,10 +53,10 @@ define amdgpu_kernel void @v_permlane16_b32_vvv(i32 addrspace(1)* %out, i32 %src
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_vvs:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vvs(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0)
@@ -60,10 +65,10 @@ define amdgpu_kernel void @v_permlane16_b32_vvs(i32 addrspace(1)* %out, i32 %src
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_vsv:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v1
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v{{[0-9]+}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vsv(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
   %tidy = call i32 @llvm.amdgcn.workitem.id.y()
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0)
@@ -72,8 +77,8 @@ define amdgpu_kernel void @v_permlane16_b32_vsv(i32 addrspace(1)* %out, i32 %src
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_vss_fi:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[1,0]{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vss_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -81,8 +86,8 @@ define amdgpu_kernel void @v_permlane16_b32_vss_fi(i32 addrspace(1)* %out, i32 %
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_vss_bc:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1]{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vss_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1)
   store i32 %v, i32 addrspace(1)* %out
@@ -90,8 +95,8 @@ define amdgpu_kernel void @v_permlane16_b32_vss_bc(i32 addrspace(1)* %out, i32 %
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_vss_fi_bc:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[1,1]{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_vss_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1)
   store i32 %v, i32 addrspace(1)* %out
@@ -99,8 +104,8 @@ define amdgpu_kernel void @v_permlane16_b32_vss_fi_bc(i32 addrspace(1)* %out, i3
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vss:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vss(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -108,8 +113,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vss(i32 addrspace(1)* %out, i32 %sr
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vii:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %src0) #1 {
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -117,11 +122,11 @@ define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %sr
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vll:
-; FIXME-GFX10: It is allowed to have both immediates as literals
-; GFX10-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
-; GFX10-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
+; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
+; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -131,7 +136,11 @@ define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %sr
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vvv:
 ; GFX10-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
 ; GFX10-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v1
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
+; GFX11-DAG: v_and_b32_e32 [[VSRC1:v[0-9]+]],
+; GFX11-DAG: v_bfe_u32 [[VSRC2:v[0-9]+]],
+; GFX11-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], [[VSRC1]]
+; GFX11-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], [[VSRC2]]
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vvv(i32 addrspace(1)* %out, i32 %src0) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %tidy = call i32 @llvm.amdgcn.workitem.id.y()
@@ -141,10 +150,10 @@ define amdgpu_kernel void @v_permlanex16_b32_vvv(i32 addrspace(1)* %out, i32 %sr
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vvs:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vvs(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0)
@@ -153,10 +162,10 @@ define amdgpu_kernel void @v_permlanex16_b32_vvs(i32 addrspace(1)* %out, i32 %sr
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vsv:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v1
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v{{[0-9]+}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vsv(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
   %tidy = call i32 @llvm.amdgcn.workitem.id.y()
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0)
@@ -165,8 +174,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vsv(i32 addrspace(1)* %out, i32 %sr
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vss_fi:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[1,0]{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vss_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0)
   store i32 %v, i32 addrspace(1)* %out
@@ -174,8 +183,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vss_fi(i32 addrspace(1)* %out, i32
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vss_bc:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1]{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vss_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1)
   store i32 %v, i32 addrspace(1)* %out
@@ -183,8 +192,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vss_bc(i32 addrspace(1)* %out, i32
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_vss_fi_bc:
-; GFX10-NOT: v_readfirstlane_b32
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[1,1]{{$}}
+; GFX10PLUS-NOT: v_readfirstlane_b32
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_vss_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1)
   store i32 %v, i32 addrspace(1)* %out
@@ -192,7 +201,7 @@ define amdgpu_kernel void @v_permlanex16_b32_vss_fi_bc(i32 addrspace(1)* %out, i
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_tid_tid:
-; GFX10: v_permlane16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX10PLUS: v_permlane16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlane16_b32_tid_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlane16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
@@ -201,7 +210,7 @@ define amdgpu_kernel void @v_permlane16_b32_tid_tid(i32 addrspace(1)* %out, i32
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_undef_tid:
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlane16_b32_undef_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
@@ -210,8 +219,8 @@ define amdgpu_kernel void @v_permlane16_b32_undef_tid(i32 addrspace(1)* %out, i3
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_i_tid:
-; GFX10: v_mov_b32_e32 [[OLD:v[0-9]+]], 0x3039
-; GFX10: v_permlane16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX10PLUS: v_{{(dual_)?}}mov_b32{{(_e32)?}} [[OLD:v[0-9]+]], 0x3039
+; GFX10PLUS: v_permlane16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlane16_b32_i_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
@@ -220,8 +229,8 @@ define amdgpu_kernel void @v_permlane16_b32_i_tid(i32 addrspace(1)* %out, i32 %s
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_fi:
-; GFX10-NOT: 0x3039
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[1,0]{{$}}
+; GFX10PLUS-NOT: 0x3039
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_i_tid_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0)
@@ -230,8 +239,8 @@ define amdgpu_kernel void @v_permlane16_b32_i_tid_fi(i32 addrspace(1)* %out, i32
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_bc:
-; GFX10-NOT: 0x3039
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1]{{$}}
+; GFX10PLUS-NOT: 0x3039
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_i_tid_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1)
@@ -240,8 +249,8 @@ define amdgpu_kernel void @v_permlane16_b32_i_tid_bc(i32 addrspace(1)* %out, i32
 }
 
 ; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_fi_bc:
-; GFX10-NOT: 0x3039
-; GFX10: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[1,1]{{$}}
+; GFX10PLUS-NOT: 0x3039
+; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}}
 define amdgpu_kernel void @v_permlane16_b32_i_tid_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1)
@@ -250,7 +259,7 @@ define amdgpu_kernel void @v_permlane16_b32_i_tid_fi_bc(i32 addrspace(1)* %out,
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_tid_tid:
-; GFX10: v_permlanex16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX10PLUS: v_permlanex16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_tid_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlanex16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
@@ -259,7 +268,7 @@ define amdgpu_kernel void @v_permlanex16_b32_tid_tid(i32 addrspace(1)* %out, i32
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_undef_tid:
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_undef_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
@@ -268,8 +277,8 @@ define amdgpu_kernel void @v_permlanex16_b32_undef_tid(i32 addrspace(1)* %out, i
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid:
-; GFX10: v_mov_b32_e32 [[OLD:v[0-9]+]], 0x3039
-; GFX10: v_permlanex16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
+; GFX10PLUS: v_{{(dual_)?}}mov_b32{{(_e32)?}} [[OLD:v[0-9]+]], 0x3039
+; GFX10PLUS: v_permlanex16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_i_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
@@ -278,8 +287,8 @@ define amdgpu_kernel void @v_permlanex16_b32_i_tid(i32 addrspace(1)* %out, i32 %
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_fi:
-; GFX10-NOT: 0x3039
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[1,0]{{$}}
+; GFX10PLUS-NOT: 0x3039
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0)
@@ -288,8 +297,8 @@ define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi(i32 addrspace(1)* %out, i3
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_bc:
-; GFX10-NOT: 0x3039
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1]{{$}}
+; GFX10PLUS-NOT: 0x3039
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_i_tid_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1)
@@ -298,8 +307,8 @@ define amdgpu_kernel void @v_permlanex16_b32_i_tid_bc(i32 addrspace(1)* %out, i3
 }
 
 ; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_fi_bc:
-; GFX10-NOT: 0x3039
-; GFX10: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[1,1]{{$}}
+; GFX10PLUS-NOT: 0x3039
+; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}}
 define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
   %tidx = call i32 @llvm.amdgcn.workitem.id.x()
   %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
index 2cc4a1bdb122..768165368fb0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE64  %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %s
 
 ;CHECK-LABEL: {{^}}ret:
 ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1
@@ -45,7 +46,7 @@ main_body:
 
 ;WAVE32: s_wqm_b32 [[WQM:[^,]+]], [[CMP]]
 ;WAVE32: s_xor_b32 [[KILL:[^,]+]], [[WQM]], exec
-;WAVE32: s_andn2_b32 [[MASK:[^,]+]], [[EXEC:[^,]+]], [[KILL]]
+;WAVE32: s_and{{n2|_not1}}_b32 [[MASK:[^,]+]], [[EXEC:[^,]+]], [[KILL]]
 ;WAVE32: s_and_b32 exec_lo, exec_lo, [[MASK]]
 
 ;CHECK: s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
index f9de42f47b65..bd68d30fa544 100644
--- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
+++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
@@ -2,12 +2,13 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s
 
 declare i64 @llvm.readcyclecounter() #0
 
 ; GCN-LABEL: {{^}}test_readcyclecounter:
 ; MEMTIME-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
-; GCN-DAG:     s_load_dwordx2
+; GCN-DAG:     s_load_{{dwordx2|b64}}
 ; GCN-DAG:     lgkmcnt
 ; MEMTIME:     store_dwordx2
 ; SIVI-NOT:    lgkmcnt
@@ -19,10 +20,10 @@ declare i64 @llvm.readcyclecounter() #0
 ; GETREG-SDAG-DAG:  v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
 ; GETREG-DAG:  s_getreg_b32 [[CNT1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES, 0, 20)
 ; GETREG-DAG:  v_mov_b32_e32 v[[VCNT1:[0-9]+]], [[CNT1]]
-; GETREG:      global_store_dwordx2 v{{.+}}, v[[[VCNT1]]:[[ZERO]]]
+; GETREG:      global_store_{{dwordx2|b64}} v{{.+}}, v[[[VCNT1]]:[[ZERO]]]
 ; GETREG:      s_getreg_b32 [[CNT2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES, 0, 20)
 ; GETREG:      v_mov_b32_e32 v[[VCNT2:[0-9]+]], [[CNT2]]
-; GETREG:      global_store_dwordx2 v{{.+}}, v[[[VCNT2]]:[[ZERO]]]
+; GETREG:      global_store_{{dwordx2|b64}} v{{.+}}, v[[[VCNT2]]:[[ZERO]]]
 
 define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
   %cycle0 = call i64 @llvm.readcyclecounter()
@@ -37,7 +38,7 @@ define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 {
 ;
 ; GCN-LABEL: {{^}}test_readcyclecounter_smem:
 ; MEMTIME-DAG: s_memtime
-; GCN-DAG:     s_load_dword
+; GCN-DAG:     s_load_{{dword|b32|b64}}
 ; GETREG-DAG:  s_getreg_b32 s1, hwreg(HW_REG_SHADER_CYCLES, 0, 20)
 define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(4)* inreg %in) #0 {
   %cycle0 = call i64 @llvm.readcyclecounter()