[llvm] [NFC][AMDGPU] Update tests to use autogened CHECKs (PR #140311)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 16 14:31:49 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Chinmay Deshpande (chinmaydd)
<details>
<summary>Changes</summary>
This is in preparation for updates relevant to enabling CopyConstrain DAG mutation in the default maxOccupancy scheduler.
Change-Id: I4c6928197c7cfc8d1d51db90d111c3ffc8e09e32
---
Patch is 626.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140311.diff
9 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll (+286-44)
- (modified) llvm/test/CodeGen/AMDGPU/dagcombine-select.ll (+331-110)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll (+460-200)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll (+4880-424)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll (+2750-286)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll (+805-250)
- (modified) llvm/test/CodeGen/AMDGPU/load-global-f32.ll (+702-92)
- (modified) llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll (+402-128)
- (modified) llvm/test/CodeGen/AMDGPU/trunc.ll (+422-50)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll b/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
index 2c2855c860ebb..81a646f8b7055 100644
--- a/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,FUNC,GFX7 %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,FUNC,GFX8 %s
@@ -10,21 +11,70 @@
; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
; instructions with B64, U64, and I64 take 64-bit operands.
-; FUNC-LABEL: {{^}}local_address_load:
-; SI: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
-; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define amdgpu_kernel void @local_address_load(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_load:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_load:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = load i32, ptr addrspace(3) %in
store i32 %0, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}local_address_gep:
-; SI: s_add_i32 [[SPTR:s[0-9]]]
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_read_b32 [[VPTR]]
define amdgpu_kernel void @local_address_gep(ptr addrspace(1) %out, ptr addrspace(3) %in, i32 %offset) {
+; GFX7-LABEL: local_address_gep:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_lshl_b32 s3, s3, 2
+; GFX7-NEXT: s_add_i32 s2, s2, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_lshl_b32 s3, s3, 2
+; GFX8-NEXT: s_add_i32 s2, s2, s3
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = getelementptr i32, ptr addrspace(3) %in, i32 %offset
%1 = load i32, ptr addrspace(3) %0
@@ -32,10 +82,34 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}local_address_gep_const_offset:
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; SI: ds_read_b32 v{{[0-9]+}}, [[VPTR]] offset:4
define amdgpu_kernel void @local_address_gep_const_offset(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_gep_const_offset:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0 offset:4
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_const_offset:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0 offset:4
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = getelementptr i32, ptr addrspace(3) %in, i32 1
%1 = load i32, ptr addrspace(3) %0
@@ -44,11 +118,36 @@ entry:
}
; Offset too large, can't fold into 16-bit immediate offset.
-; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
-; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_read_b32 [[VPTR]]
define amdgpu_kernel void @local_address_gep_large_const_offset(ptr addrspace(1) %out, ptr addrspace(3) %in) {
+; GFX7-LABEL: local_address_gep_large_const_offset:
+; GFX7: ; %bb.0: ; %entry
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s2, s2, 0x10004
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_large_const_offset:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s2, s2, 0x10004
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = getelementptr i32, ptr addrspace(3) %in, i32 16385
%1 = load i32, ptr addrspace(3) %0
@@ -56,24 +155,71 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
-; GFX7 v_cmp_ne_u32
-; GFX7: s_cselect_b32
-; GFX8: s_cmp_lg_u32
-; GFX8-NOT: v_cmp_ne_u32
-; GFX8: s_cselect_b32
define amdgpu_kernel void @null_32bit_lds_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds) nounwind {
+; GFX7-LABEL: null_32bit_lds_ptr:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dword s6, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_movk_i32 s4, 0x7b
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_cmp_lg_u32 s6, 0
+; GFX7-NEXT: s_cselect_b32 s4, s4, 0x1c8
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: null_32bit_lds_ptr:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_movk_i32 s4, 0x7b
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_cmp_lg_u32 s6, 0
+; GFX8-NOT: v_cmp_ne_u32
+; GFX8-NEXT: s_cselect_b32 s4, s4, 0x1c8
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
%cmp = icmp ne ptr addrspace(3) %lds, null
%x = select i1 %cmp, i32 123, i32 456
store i32 %x, ptr addrspace(1) %out
ret void
}
-; FUNC-LABEL: {{^}}mul_32bit_ptr:
-; SI: s_mul_i32
-; SI-NEXT: s_add_i32
-; SI: ds_read_b32
define amdgpu_kernel void @mul_32bit_ptr(ptr addrspace(1) %out, ptr addrspace(3) %lds, i32 %tid) {
+; GFX7-LABEL: mul_32bit_ptr:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_mul_i32 s3, s3, 12
+; GFX7-NEXT: s_add_i32 s2, s2, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: mul_32bit_ptr:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_mul_i32 s3, s3, 12
+; GFX8-NEXT: s_add_i32 s2, s2, s3
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
%ptr = getelementptr [3 x float], ptr addrspace(3) %lds, i32 %tid, i32 0
%val = load float, ptr addrspace(3) %ptr
store float %val, ptr addrspace(1) %out
@@ -82,60 +228,156 @@ define amdgpu_kernel void @mul_32bit_ptr(ptr addrspace(1) %out, ptr addrspace(3)
@g_lds = addrspace(3) global float poison, align 4
-; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
-; SI: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}}
-; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define amdgpu_kernel void @infer_ptr_alignment_global_offset(ptr addrspace(1) %out, i32 %tid) {
+; GFX7-LABEL: infer_ptr_alignment_global_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: ds_read_b32 v0, v0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: infer_ptr_alignment_global_offset:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_mov_b32_e32 v0, 0
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: ds_read_b32 v0, v0
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
%val = load float, ptr addrspace(3) @g_lds
store float %val, ptr addrspace(1) %out
ret void
}
-
@ptr = addrspace(3) global ptr addrspace(3) poison
@dst = addrspace(3) global [16383 x i32] poison
-; FUNC-LABEL: {{^}}global_ptr:
-; SI: ds_write_b32
define amdgpu_kernel void @global_ptr() nounwind {
+; SI-LABEL: global_ptr:
+; SI: ; %bb.0:
+; SI-NEXT: v_mov_b32_e32 v0, 64
+; SI-NEXT: v_mov_b32_e32 v1, 0
+; SI-NEXT: s_mov_b32 m0, -1
+; SI-NEXT: ds_write_b32 v1, v0 offset:65532
+; SI-NEXT: s_endpgm
store ptr addrspace(3) getelementptr ([16383 x i32], ptr addrspace(3) @dst, i32 0, i32 16), ptr addrspace(3) @ptr
ret void
}
-; FUNC-LABEL: {{^}}local_address_store:
-; SI: ds_write_b32
define amdgpu_kernel void @local_address_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_store:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, s0
+; GFX7-NEXT: v_mov_b32_e32 v1, s1
+; GFX7-NEXT: ds_write_b32 v0, v1
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_store:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: ds_write_b32 v0, v1
+; GFX8-NEXT: s_endpgm
store i32 %val, ptr addrspace(3) %out
ret void
}
-; FUNC-LABEL: {{^}}local_address_gep_store:
-; SI: s_add_i32 [[SADDR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
-; SI: ds_write_b32 [[ADDR]], v{{[0-9]+}}
define amdgpu_kernel void @local_address_gep_store(ptr addrspace(3) %out, i32, i32 %val, i32 %offset) {
+; GFX7-LABEL: local_address_gep_store:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_lshl_b32 s1, s1, 2
+; GFX7-NEXT: v_mov_b32_e32 v0, s0
+; GFX7-NEXT: s_add_i32 s0, s2, s1
+; GFX7-NEXT: v_mov_b32_e32 v1, s0
+; GFX7-NEXT: ds_write_b32 v1, v0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_store:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_lshl_b32 s1, s1, 2
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: s_add_i32 s0, s2, s1
+; GFX8-NEXT: v_mov_b32_e32 v1, s0
+; GFX8-NEXT: ds_write_b32 v1, v0
+; GFX8-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %out, i32 %offset
store i32 %val, ptr addrspace(3) %gep, align 4
ret void
}
-; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; SI: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; SI: ds_write_b32 [[VPTR]], [[VAL]] offset:4
define amdgpu_kernel void @local_address_gep_const_offset_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_gep_const_offset_store:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_mov_b32_e32 v0, s0
+; GFX7-NEXT: v_mov_b32_e32 v1, s1
+; GFX7-NEXT: ds_write_b32 v0, v1 offset:4
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_const_offset_store:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: ds_write_b32 v0, v1 offset:4
+; GFX8-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %out, i32 1
store i32 %val, ptr addrspace(3) %gep, align 4
ret void
}
; Offset too large, can't fold into 16-bit immediate offset.
-; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
-; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_write_b32 [[VPTR]], v{{[0-9]+$}}
define amdgpu_kernel void @local_address_gep_large_const_offset_store(ptr addrspace(3) %out, i32 %val) {
+; GFX7-LABEL: local_address_gep_large_const_offset_store:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s0, s0, 0x10004
+; GFX7-NEXT: v_mov_b32_e32 v0, s1
+; GFX7-NEXT: v_mov_b32_e32 v1, s0
+; GFX7-NEXT: ds_write_b32 v1, v0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: local_address_gep_large_const_offset_store:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s0, s0, 0x10004
+; GFX8-NEXT: v_mov_b32_e32 v0, s1
+; GFX8-NEXT: v_mov_b32_e32 v1, s0
+; GFX8-NEXT: ds_write_b32 v1, v0
+; GFX8-NEXT: s_endpgm
%gep = getelementptr i32, ptr addrspace(3) %out, i32 16385
store i32 %val, ptr addrspace(3) %gep, align 4
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FUNC: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
index f4d8ec180cf91..f27ec1e38f942 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -1,11 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; GCN-LABEL: {{^}}select_and1:
-; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_gt_i32 s2, 10
+; GCN-NEXT: s_cselect_b32 s2, s3, 0
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NOT: v_and_b32
+; GCN-NEXT: global_store_dword v0, v1, s[0:1]
+; GCN-NEXT: s_endpgm
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = and i32 %y, %s
@@ -13,12 +20,18 @@ define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
ret void
}
-; GCN-LABEL: {{^}}select_and2:
-; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_gt_i32 s2, 10
+; GCN-NEXT: s_cselect_b32 s2, s3, 0
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NOT: v_and_b32
+; GCN-NEXT: global_store_dword v0, v1, s[0:1]
+; GCN-NEXT: s_endpgm
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 0, i32 -1
%a = and i32 %s, %y
@@ -26,12 +39,18 @@ define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) {
ret void
}
-; GCN-LABEL: {{^}}select_and3:
-; GCN: s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
-; GCN: v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
-; GCN-NOT: v_and_b32
-; GCN: store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) {
+; GCN-LABEL: select_and3:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_lt_i32 s2, 11
+; GCN-NEXT: s_cselect_b32 s2, s3, 0
+; GCN-NEXT: v_mov_b32_e32 v1, s2
+; GCN-NOT: v_and_b32
+; GCN-NEXT: global_store_dword v0, v1, s[0:1]
+; GCN-NEXT: s_endpgm
%c = icmp slt i32 %x, 11
%s = select i1 %c, i32 -1, i32 0
%a = and i32 %y, %s
@@ -39,18 +58,26 @@ define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) {
ret void
}
-; GCN-LABEL: {{^}}select_and_v4:
-; GCN: s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, 0
-; GCN: s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, 0
-; GCN: s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, 0
-; GCN: s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, 0
-; GCN: v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]]
-; GCN: v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]]
-; GCN: v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]]
-; GCN: v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]]
-; GCN-NOT: v_and_b32
-; GCN: global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]]
define amdgpu_kernel void @select_and_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) {
+; GCN-LABEL: select_and_v4:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GCN-NEXT: v_mov_b32_e32 v4, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_gt_i32 s8, 10
+; GCN-NEXT: s_cselect_b32 s3, s3, 0
+; GCN-NEXT: s_cselect_b32 s2, s2, 0
+; GCN-NEXT: s_cselect_b32 s1, s1, 0
+; GCN-NEXT: s_csel...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140311
More information about the llvm-commits
mailing list