[llvm] [NFC][AMDGPU] Regenerate CHECK lines in commute-compares.ll. (PR #140076)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 15 08:07:43 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Harrison Hao (harrisonGPU)
<details>
<summary>Changes</summary>
---
Patch is 61.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140076.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/commute-compares.ll (+865-114)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
index fcb871cedd0cb..ae8080cf9f06a 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
@@ -6,9 +7,23 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
; i32 compares
; --------------------------------------------------------------------------------
-; GCN-LABEL: {{^}}commute_eq_64_i32:
-; GCN: v_cmp_eq_u32_e32 vcc, 64, v{{[0-9]+}}
define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_eq_64_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 64, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -19,9 +34,23 @@ define amdgpu_kernel void @commute_eq_64_i32(ptr addrspace(1) %out, ptr addrspac
ret void
}
-; GCN-LABEL: {{^}}commute_ne_64_i32:
-; GCN: v_cmp_ne_u32_e32 vcc, 64, v{{[0-9]+}}
define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ne_64_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 64, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -33,10 +62,24 @@ define amdgpu_kernel void @commute_ne_64_i32(ptr addrspace(1) %out, ptr addrspac
}
; FIXME: Why isn't this being folded as a constant?
-; GCN-LABEL: {{^}}commute_ne_litk_i32:
-; GCN: s_movk_i32 [[K:s[0-9]+]], 0x3039
-; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}}
define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ne_litk_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_movk_i32 s4, 0x3039
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s4, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -47,9 +90,23 @@ define amdgpu_kernel void @commute_ne_litk_i32(ptr addrspace(1) %out, ptr addrsp
ret void
}
-; GCN-LABEL: {{^}}commute_ugt_64_i32:
-; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ugt_64_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 64, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -60,9 +117,23 @@ define amdgpu_kernel void @commute_ugt_64_i32(ptr addrspace(1) %out, ptr addrspa
ret void
}
-; GCN-LABEL: {{^}}commute_uge_64_i32:
-; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_uge_64_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_lt_u32_e32 vcc, 63, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -73,9 +144,23 @@ define amdgpu_kernel void @commute_uge_64_i32(ptr addrspace(1) %out, ptr addrspa
ret void
}
-; GCN-LABEL: {{^}}commute_ult_64_i32:
-; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ult_64_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -86,9 +171,23 @@ define amdgpu_kernel void @commute_ult_64_i32(ptr addrspace(1) %out, ptr addrspa
ret void
}
-; GCN-LABEL: {{^}}commute_ule_63_i32:
-; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ule_63_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -99,10 +198,24 @@ define amdgpu_kernel void @commute_ule_63_i32(ptr addrspace(1) %out, ptr addrspa
ret void
}
-; GCN-LABEL: {{^}}commute_ule_64_i32:
-; GCN: s_movk_i32 [[K:s[0-9]+]], 0x41{{$}}
-; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ule_64_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_movk_i32 s4, 0x41
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_gt_u32_e32 vcc, s4, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -113,9 +226,23 @@ define amdgpu_kernel void @commute_ule_64_i32(ptr addrspace(1) %out, ptr addrspa
ret void
}
-; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
-; GCN: v_ashrrev_i32_e32 v2, 31, v2
define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_sgt_neg1_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_not_b32_e32 v2, v2
+; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v2
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -126,9 +253,23 @@ define amdgpu_kernel void @commute_sgt_neg1_i32(ptr addrspace(1) %out, ptr addrs
ret void
}
-; GCN-LABEL: {{^}}commute_sge_neg2_i32:
-; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_sge_neg2_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_lt_i32_e32 vcc, -3, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -139,9 +280,23 @@ define amdgpu_kernel void @commute_sge_neg2_i32(ptr addrspace(1) %out, ptr addrs
ret void
}
-; GCN-LABEL: {{^}}commute_slt_neg16_i32:
-; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_slt_neg16_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_gt_i32_e32 vcc, -16, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -152,9 +307,23 @@ define amdgpu_kernel void @commute_slt_neg16_i32(ptr addrspace(1) %out, ptr addr
ret void
}
-; GCN-LABEL: {{^}}commute_sle_5_i32:
-; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_sle_5_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 6, v2
+; GCN-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -169,9 +338,24 @@ define amdgpu_kernel void @commute_sle_5_i32(ptr addrspace(1) %out, ptr addrspac
; i64 compares
; --------------------------------------------------------------------------------
-; GCN-LABEL: {{^}}commute_eq_64_i64:
-; GCN: v_cmp_eq_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_eq_64_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 64, v[3:4]
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -182,9 +366,24 @@ define amdgpu_kernel void @commute_eq_64_i64(ptr addrspace(1) %out, ptr addrspac
ret void
}
-; GCN-LABEL: {{^}}commute_ne_64_i64:
-; GCN: v_cmp_ne_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ne_64_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 64, v[3:4]
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -195,9 +394,24 @@ define amdgpu_kernel void @commute_ne_64_i64(ptr addrspace(1) %out, ptr addrspac
ret void
}
-; GCN-LABEL: {{^}}commute_ugt_64_i64:
-; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ugt_64_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_lt_u64_e32 vcc, 64, v[3:4]
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -208,9 +422,24 @@ define amdgpu_kernel void @commute_ugt_64_i64(ptr addrspace(1) %out, ptr addrspa
ret void
}
-; GCN-LABEL: {{^}}commute_uge_64_i64:
-; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_uge_64_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[3:4]
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid
@@ -221,9 +450,24 @@ define amdgpu_kernel void @commute_uge_64_i64(ptr addrspace(1) %out, ptr addrspa
ret void
}
-; GCN-LABEL: {{^}}commute_ult_64_i64:
-; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @commute_ult_64_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
+; GCN-LABEL: commute_ult_64_i64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GCN-NEXT: buffer_load_dwordx2 v[3:4], v[1:2], s[4:7], 0 addr64
+; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
+; GCN-NEXT: s_mov_b64 s[2:3], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[3:4]
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
+; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%gep.out = getelementptr i32, ptr addrspace(1) %...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140076
More information about the llvm-commits
mailing list