[llvm] eccdedd - [AMDGPU] Autogenerate icmp codegen test

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 23 23:37:59 PDT 2022


Author: Pierre van Houtryve
Date: 2022-10-24T06:37:50Z
New Revision: eccdedd6f787c8e80529cd998d30c478b03e5b43

URL: https://github.com/llvm/llvm-project/commit/eccdedd6f787c8e80529cd998d30c478b03e5b43
DIFF: https://github.com/llvm/llvm-project/commit/eccdedd6f787c8e80529cd998d30c478b03e5b43.diff

LOG: [AMDGPU] Autogenerate icmp codegen test

Switch to autogenerated tests so we can use the same test for GISel and DAGIsel.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D136446

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
index 327c744ad42d0..7c9d063bb7242 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
@@ -1,315 +1,1086 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
 
+declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
+declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
 
-; GCN-LABEL: {{^}}v_icmp_i32_eq:
-; GCN: v_cmp_eq_u32_e64
 define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_eq:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_eq_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_eq:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_eq_u32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32:
-; GCN-NOT: v_cmp_eq_u32_e64
 define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) {
+; GCN-LABEL: v_icmp_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32_ne:
-; GCN: v_cmp_ne_u32_e64
 define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_ne:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_ne:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ne_u32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32_ugt:
-; GCN: v_cmp_gt_u32_e64
 define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_ugt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_gt_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_ugt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_gt_u32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32_uge:
-; GCN: v_cmp_ge_u32_e64
 define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_uge:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_ge_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_uge:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ge_u32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32_ult:
-; GCN: v_cmp_lt_u32_e64
 define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_ult:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_lt_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_ult:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_lt_u32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32_ule:
-; GCN: v_cmp_le_u32_e64
 define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_ule:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_ule:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_le_u32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32_sgt:
-; GCN: v_cmp_gt_i32_e64
 define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 {
+; GFX-LABEL: v_icmp_i32_sgt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_gt_i32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_sgt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_gt_i32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32_sge:
-; GCN: v_cmp_ge_i32_e64
 define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_sge:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_ge_i32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_sge:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ge_i32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i32_slt:
-; GCN: v_cmp_lt_i32_e64
 define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_slt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_lt_i32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_slt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_lt_i32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
-; GCN-LABEL: {{^}}v_icmp_i32_sle:
-; GCN: v_cmp_le_i32_e64
+
 define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) {
+; GFX-LABEL: v_icmp_i32_sle:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_le_i32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i32_sle:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_le_i32_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i64_eq:
-; GCN: v_cmp_eq_u64_e64
 define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_i64_eq:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i64_eq:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i64_ne:
-; GCN: v_cmp_ne_u64_e64
 define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_i64_ne:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i64_ne:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ne_u64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_u64_ugt:
-; GCN: v_cmp_gt_u64_e64
 define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_u64_ugt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_u64_ugt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_gt_u64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_u64_uge:
-; GCN: v_cmp_ge_u64_e64
 define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_u64_uge:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_u64_uge:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ge_u64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_u64_ult:
-; GCN: v_cmp_lt_u64_e64
 define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_u64_ult:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_u64_ult:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_u64_ule:
-; GCN: v_cmp_le_u64_e64
 define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_u64_ule:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_u64_ule:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_le_u64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i64_sgt:
-; GCN: v_cmp_gt_i64_e64
 define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_i64_sgt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i64_sgt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_gt_i64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i64_sge:
-; GCN: v_cmp_ge_i64_e64
 define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_i64_sge:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i64_sge:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ge_i64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i64_slt:
-; GCN: v_cmp_lt_i64_e64
 define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_i64_slt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i64_slt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_lt_i64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
-; GCN-LABEL: {{^}}v_icmp_i64_sle:
-; GCN: v_cmp_le_i64_e64
+
 define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) {
+; GFX-LABEL: v_icmp_i64_sle:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    v_mov_b32_e32 v1, 0
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i64_sle:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_le_i64_e64 s[2:3], s[2:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; VI: v_cmp_eq_u16_e64
-
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
-; SI: v_cmp_eq_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_eq:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX-NEXT:    v_cmp_eq_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_eq:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_eq_u16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16:
-; GCN-NOT: v_cmp_eq_
 define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) {
+; GCN-LABEL: v_icmp_i16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
-; GCN-LABEL: {{^}}v_icmp_i16_ne:
-; VI: v_cmp_ne_u16_e64
 
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
-; SI: v_cmp_ne_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_ne:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX-NEXT:    v_cmp_ne_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_ne:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ne_u16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16_ugt:
-; VI: v_cmp_gt_u16_e64
-
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
-; SI: v_cmp_gt_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_ugt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX-NEXT:    v_cmp_gt_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_ugt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_gt_u16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16_uge:
-; VI: v_cmp_ge_u16_e64
-
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
-; SI: v_cmp_ge_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_uge:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX-NEXT:    v_cmp_ge_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_uge:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ge_u16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16_ult:
-; VI: v_cmp_lt_u16_e64
-
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
-; SI: v_cmp_lt_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_ult:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX-NEXT:    v_cmp_lt_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_ult:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_lt_u16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16_ule:
-; VI: v_cmp_le_u16_e64
-
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_and_b32 [[CVT:s[0-9]+]], s{{[0-9]+}}, 0xffff{{$}}
-; SI: v_cmp_le_u32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_ule:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX-NEXT:    v_cmp_le_u32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_ule:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_le_u16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16_sgt:
-; VI: v_cmp_gt_i16_e64
-
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_sext_i32_i16 [[CVT:s[0-9]+]], s{{[0-9]+}}
-; SI: v_cmp_gt_i32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 {
+; GFX-LABEL: v_icmp_i16_sgt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_sext_i32_i16 s2, s2
+; GFX-NEXT:    v_cmp_gt_i32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_sgt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_gt_i16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16_sge:
-; VI: v_cmp_ge_i16_e64
-
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_sext_i32_i16 [[CVT:s[0-9]+]], s{{[0-9]+}}
-; SI: v_cmp_ge_i32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_sge:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_sext_i32_i16 s2, s2
+; GFX-NEXT:    v_cmp_ge_i32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_sge:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_ge_i16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i16_slt:
-; VI: v_cmp_lt_i16_e64
-
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_sext_i32_i16 [[CVT:s[0-9]+]], s{{[0-9]+}}
-; SI: v_cmp_lt_i32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_slt:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_sext_i32_i16 s2, s2
+; GFX-NEXT:    v_cmp_lt_i32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_slt:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_lt_i16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
-; GCN-LABEL: {{^}}v_icmp_i16_sle:
-; VI: v_cmp_le_i16_e64
 
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x64
-; SI-DAG: s_sext_i32_i16 [[CVT:s[0-9]+]], s{{[0-9]+}}
-; SI: v_cmp_le_i32_e64 s{{\[[0-9]+:[0-9]+\]}}, [[CVT]], [[K]]
 define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
+; GFX-LABEL: v_icmp_i16_sle:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dword s2, s[0:1], 0xb
+; GFX-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s3, 0xf000
+; GFX-NEXT:    v_mov_b32_e32 v0, 0x64
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_sext_i32_i16 s2, s2
+; GFX-NEXT:    v_cmp_le_i32_e64 s[4:5], s2, v0
+; GFX-NEXT:    s_mov_b32 s2, -1
+; GFX-NEXT:    v_mov_b32_e32 v0, s4
+; GFX-NEXT:    v_mov_b32_e32 v1, s5
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i16_sle:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    v_mov_b32_e32 v0, 0x64
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_cmp_le_i16_e64 s[2:3], s2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41)
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; GCN-LABEL: {{^}}v_icmp_i1_ne0:
-; GCN: s_cmp_gt_u32
-; GCN: s_cselect_b64 s[[C0:\[[0-9]+:[0-9]+\]]], -1, 0
-; GCN: s_cmp_gt_u32
-; GCN: s_cselect_b64 s[[C1:\[[0-9]+:[0-9]+\]]], -1, 0
-; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]]
-; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
-; SI-NEXT: s_mov_b32
-; SI-NEXT: s_mov_b32
-; GCN-NEXT: v_mov_b32_e32
-; GCN-NEXT: v_mov_b32_e32
-; GCN: {{global|flat|buffer}}_store_dwordx2
 define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+; GFX-LABEL: v_icmp_i1_ne0:
+; GFX:       ; %bb.0:
+; GFX-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX-NEXT:    s_mov_b32 s7, 0xf000
+; GFX-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX-NEXT:    s_cmp_gt_u32 s2, 1
+; GFX-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; GFX-NEXT:    s_cmp_gt_u32 s3, 2
+; GFX-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; GFX-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; GFX-NEXT:    s_mov_b32 s6, -1
+; GFX-NEXT:    s_mov_b32 s4, s0
+; GFX-NEXT:    s_mov_b32 s5, s1
+; GFX-NEXT:    v_mov_b32_e32 v0, s2
+; GFX-NEXT:    v_mov_b32_e32 v1, s3
+; GFX-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX-NEXT:    s_endpgm
+;
+; VI-LABEL: v_icmp_i1_ne0:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_cmp_gt_u32 s2, 1
+; VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
+; VI-NEXT:    s_cmp_gt_u32 s3, 2
+; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; VI-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT:    s_endpgm
   %c0 = icmp ugt i32 %a, 1
   %c1 = icmp ugt i32 %b, 2
   %src = and i1 %c0, %c1
@@ -318,4 +1089,13 @@ define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b)
   ret void
 }
 
+define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) {
+; GCN-LABEL: test_intr_icmp_i32_invalid_cc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 9999)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
 attributes #0 = { nounwind readnone convergent }


        


More information about the llvm-commits mailing list