[llvm] AMDGPU/GlobalISel: Restore disabled test (PR #129001)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 21:09:48 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Patch is 74.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129001.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll (+421-466)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 53f6c9543c3e3..074272f7bed86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -5,7 +5,6 @@
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s
-; REQUIRES: do-not-run-me
define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) {
; GFX7-LABEL: v_div_fmas_f32:
@@ -291,14 +290,14 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double
define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
; GFX7-LABEL: test_div_fmas_f32:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa
-; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13
-; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c
-; GFX7-NEXT: s_load_dword s7, s[2:3], 0x25
-; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0xa
+; GFX7-NEXT: s_load_dword s3, s[4:5], 0x13
+; GFX7-NEXT: s_load_dword s6, s[4:5], 0x1c
+; GFX7-NEXT: s_load_dword s7, s[4:5], 0x25
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, s4
-; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: v_mov_b32_e32 v1, s3
; GFX7-NEXT: v_mov_b32_e32 v2, s6
; GFX7-NEXT: s_and_b32 s2, 1, s7
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
@@ -311,17 +310,17 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
;
; GFX8-LABEL: test_div_fmas_f32:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28
-; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c
-; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70
-; GFX8-NEXT: s_load_dword s5, s[2:3], 0x94
+; GFX8-NEXT: s_load_dword s0, s[4:5], 0x28
+; GFX8-NEXT: s_load_dword s1, s[4:5], 0x4c
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x70
+; GFX8-NEXT: s_load_dword s3, s[4:5], 0x94
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: s_and_b32 s0, 1, s5
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
+; GFX8-NEXT: s_and_b32 s0, 1, s3
; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
-; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-NEXT: s_nop 2
; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -333,14 +332,14 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
; GFX10_W32-LABEL: test_div_fmas_f32:
; GFX10_W32: ; %bb.0:
; GFX10_W32-NEXT: s_clause 0x4
-; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94
-; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x4c
-; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x70
-; GFX10_W32-NEXT: s_load_dword s7, s[2:3], 0x28
-; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x94
+; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x4c
+; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x70
+; GFX10_W32-NEXT: s_load_dword s7, s[4:5], 0x28
+; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10_W32-NEXT: s_and_b32 s2, 1, s4
-; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
+; GFX10_W32-NEXT: s_and_b32 s2, 1, s2
+; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3
; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6
; GFX10_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1
@@ -351,14 +350,14 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
; GFX10_W64-LABEL: test_div_fmas_f32:
; GFX10_W64: ; %bb.0:
; GFX10_W64-NEXT: s_clause 0x4
-; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94
-; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x4c
-; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x70
-; GFX10_W64-NEXT: s_load_dword s7, s[2:3], 0x28
-; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x94
+; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x4c
+; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x70
+; GFX10_W64-NEXT: s_load_dword s7, s[4:5], 0x28
+; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10_W64-NEXT: s_and_b32 s2, 1, s4
-; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
+; GFX10_W64-NEXT: s_and_b32 s2, 1, s2
+; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3
; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6
; GFX10_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1
@@ -369,40 +368,36 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
; GFX11_W32-LABEL: test_div_fmas_f32:
; GFX11_W32: ; %bb.0:
; GFX11_W32-NEXT: s_clause 0x4
-; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94
-; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x4c
-; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x70
-; GFX11_W32-NEXT: s_load_b32 s7, s[2:3], 0x28
-; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x94
+; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x4c
+; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x70
+; GFX11_W32-NEXT: s_load_b32 s7, s[4:5], 0x28
+; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11_W32-NEXT: s_and_b32 s2, 1, s4
-; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
+; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
+; GFX11_W32-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s6
; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX11_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX11_W32-NEXT: s_nop 0
-; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-LABEL: test_div_fmas_f32:
; GFX11_W64: ; %bb.0:
; GFX11_W64-NEXT: s_clause 0x4
-; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94
-; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x4c
-; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x70
-; GFX11_W64-NEXT: s_load_b32 s7, s[2:3], 0x28
-; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x94
+; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x4c
+; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x70
+; GFX11_W64-NEXT: s_load_b32 s7, s[4:5], 0x28
+; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11_W64-NEXT: s_and_b32 s2, 1, s4
-; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5
+; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
+; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6
; GFX11_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX11_W64-NEXT: s_nop 0
-; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
store float %result, ptr addrspace(1) %out, align 4
@@ -412,33 +407,33 @@ define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], f
define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
; GFX7-LABEL: test_div_fmas_f32_inline_imm_0:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_load_dword s4, s[2:3], 0x13
-; GFX7-NEXT: s_load_dword s5, s[2:3], 0x1c
-; GFX7-NEXT: s_load_dword s6, s[2:3], 0x25
-; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
-; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0x13
+; GFX7-NEXT: s_load_dword s3, s[4:5], 0x1c
+; GFX7-NEXT: s_load_dword s6, s[4:5], 0x25
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, s4
-; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: v_mov_b32_e32 v1, s3
; GFX7-NEXT: s_and_b32 s2, 1, s6
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX7-NEXT: s_mov_b32 s2, -1
-; GFX7-NEXT: s_nop 2
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_nop 1
; GFX7-NEXT: v_div_fmas_f32 v0, 1.0, v0, v1
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_fmas_f32_inline_imm_0:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_load_dword s0, s[2:3], 0x4c
-; GFX8-NEXT: s_load_dword s1, s[2:3], 0x70
-; GFX8-NEXT: s_load_dword s4, s[2:3], 0x94
+; GFX8-NEXT: s_load_dword s0, s[4:5], 0x4c
+; GFX8-NEXT: s_load_dword s1, s[4:5], 0x70
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x94
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: s_and_b32 s0, 1, s4
+; GFX8-NEXT: s_and_b32 s0, 1, s2
; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
-; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-NEXT: s_nop 2
; GFX8-NEXT: v_div_fmas_f32 v2, 1.0, v0, v1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -450,14 +445,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0:
; GFX10_W32: ; %bb.0:
; GFX10_W32-NEXT: s_clause 0x3
-; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94
-; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70
-; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x4c
-; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x94
+; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x70
+; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x4c
+; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10_W32-NEXT: s_and_b32 s2, 1, s4
-; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
+; GFX10_W32-NEXT: s_and_b32 s2, 1, s2
+; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3
; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX10_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0
; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1]
@@ -466,14 +461,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0:
; GFX10_W64: ; %bb.0:
; GFX10_W64-NEXT: s_clause 0x3
-; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94
-; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70
-; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x4c
-; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x94
+; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x70
+; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x4c
+; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10_W64-NEXT: s_and_b32 s2, 1, s4
-; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
+; GFX10_W64-NEXT: s_and_b32 s2, 1, s2
+; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3
; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX10_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0
; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1]
@@ -482,37 +477,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_0:
; GFX11_W32: ; %bb.0:
; GFX11_W32-NEXT: s_clause 0x3
-; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94
-; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70
-; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x4c
-; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x94
+; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x70
+; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x4c
+; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11_W32-NEXT: s_and_b32 s2, 1, s4
-; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5
+; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
+; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX11_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX11_W32-NEXT: s_nop 0
-; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_0:
; GFX11_W64: ; %bb.0:
; GFX11_W64-NEXT: s_clause 0x3
-; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94
-; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70
-; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x4c
-; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x94
+; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x70
+; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x4c
+; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11_W64-NEXT: s_and_b32 s2, 1, s4
-; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5
+; GFX11_W64-NEXT: s_and_b32 s2, 1, s2
+; GFX11_W64-NEXT: v_mov_b32_e32 v0, s3
; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX11_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0
; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX11_W64-NEXT: s_nop 0
-; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
%result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d)
store float %result, ptr addrspace(1) %out, align 4
@@ -522,33 +513,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out,
define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) {
; GFX7-LABEL: test_div_fmas_f32_inline_imm_1:
; GFX7: ; %bb.0:
-; GFX7-NEXT: s_load_dword s4, s[2:3], 0x2
-; GFX7-NEXT: s_load_dword s5, s[2:3], 0x4
-; GFX7-NEXT: s_load_dword s6, s[2:3], 0xd
-; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
-; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_load_dword s2, s[4:5], 0x2
+; GFX7-NEXT: s_load_dword s3, s[4:5], 0x4
+; GFX7-NEXT: s_load_dword s6, s[4:5], 0xd
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, s4
-; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: v_mov_b32_e32 v1, s3
; GFX7-NEXT: s_and_b32 s2, 1, s6
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX7-NEXT: s_mov_b32 s2, -1
-; GFX7-NEXT: s_nop 2
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_nop 1
; GFX7-NEXT: v_div_fmas_f32 v0, v0, 1.0, v1
; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_fmas_f32_inline_imm_1:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_load_dword s0, s[2:3], 0x8
-; GFX8-NEXT: s_load_dword s1, s[2:3], 0x10
-; GFX8-NEXT: s_load_dword s4, s[2:3], 0x34
+; GFX8-NEXT: s_load_dword s0, s[4:5], 0x8
+; GFX8-NEXT: s_load_dword s1, s[4:5], 0x10
+; GFX8-NEXT: s_load_dword s2, s[4:5], 0x34
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
-; GFX8-NEXT: s_and_b32 s0, 1, s4
+; GFX8-NEXT: s_and_b32 s0, 1, s2
; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
-; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-NEXT: s_nop 2
; GFX8-NEXT: v_div_fmas_f32 v2, v0, 1.0, v1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -560,14 +551,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out,
; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1:
; GFX10_W32: ; %bb.0:
; GFX10_W32-NEXT: s_clause 0x3
-; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x34
-; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x10
-; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x8
-; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W32-NEXT: s_load_dword s2, s[4:5], 0x34
+; GFX10_W32-NEXT: s_load_dword s3, s[4:5], 0x10
+; GFX10_W32-NEXT: s_load_dword s6, s[4:5], 0x8
+; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10_W32-NEXT: s_and_b32 s2, 1, s4
-; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5
+; GFX10_W32-NEXT: s_and_b32 s2, 1, s2
+; GFX10_W32-NEXT: v_mov_b32_e32 v0, s3
; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0
; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1]
@@ -576,14 +567,14 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out,
; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1:
; GFX10_W64: ; %bb.0:
; GFX10_W64-NEXT: s_clause 0x3
-; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x34
-; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x10
-; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x8
-; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GFX10_W64-NEXT: s_load_dword s2, s[4:5], 0x34
+; GFX10_W64-NEXT: s_load_dword s3, s[4:5], 0x10
+; GFX10_W64-NEXT: s_load_dword s6, s[4:5], 0x8
+; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10_W64-NEXT: s_and_b32 s2, 1, s4
-; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5
+; GFX10_W64-NEXT: s_and_b32 s2, 1, s2
+; GFX10_W64-NEXT: v_mov_b32_e32 v0, s3
; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0
; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1]
@@ -592,37 +583,33 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out,
; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1:
; GFX11_W32: ; %bb.0:
; GFX11_W32-NEXT: s_clause 0x3
-; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x34
-; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x10
-; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x8
-; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W32-NEXT: s_load_b32 s2, s[4:5], 0x34
+; GFX11_W32-NEXT: s_load_b32 s3, s[4:5], 0x10
+; GFX11_W32-NEXT: s_load_b32 s6, s[4:5], 0x8
+; GFX11_W32-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11_W32-NEXT: s_and_b32 s2, 1, s4
-; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5
+; GFX11_W32-NEXT: s_and_b32 s2, 1, s2
+; GFX11_W32-NEXT: v_mov_b32_e32 v0, s3
; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2
; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0
; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX11_W32-NEXT: s_nop 0
-; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W32-NEXT: s_endpgm
;
; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1:
; GFX11_W64: ; %bb.0:
; GFX11_W64-NEXT: s_clause 0x3
-; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x34
-; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x10
-; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x8
-; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
+; GFX11_W64-NEXT: s_load_b32 s2, s[4:5], 0x34
+; GFX11_W64-NEXT: s_load_b32 s3, s[4:5], 0x10
+; GFX11_W64-NEXT: s_load_b32 s6, s[4:5], 0x8
+; GFX11_W64-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0
; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11_W64-NEXT: s_and_b32 s2, 1, s4
-; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5
+; GFX11_W64-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/129001
More information about the llvm-commits
mailing list