[llvm] 40e1510 - [AMDGPU][NFC] Enable gfx942 for more tests (#154363)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 07:46:30 PDT 2025
Author: Janek van Oirschot
Date: 2025-08-20T15:46:26+01:00
New Revision: 40e1510146da2c4fa06cdd1901ea19173d8a638c
URL: https://github.com/llvm/llvm-project/commit/40e1510146da2c4fa06cdd1901ea19173d8a638c
DIFF: https://github.com/llvm/llvm-project/commit/40e1510146da2c4fa06cdd1901ea19173d8a638c.diff
LOG: [AMDGPU][NFC] Enable gfx942 for more tests (#154363)
Enable gfx942 for tests that are affected by the an AMDGPU bitcast
constant combine (#154115)
Expecting to see more tests affected in aforementioned PR after rebase
on top of this PR
Added:
Modified:
llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
llvm/test/CodeGen/AMDGPU/imm.ll
llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
index 85180a2dc6348..c429b1a32bde6 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX942 %s
define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
; GCN-LABEL: select_and1:
@@ -56,24 +57,43 @@ define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) {
}
define amdgpu_kernel void @select_and_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) {
-; GCN-LABEL: select_and_v4:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s8, s[4:5], 0x2c
-; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
-; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GCN-NEXT: v_mov_b32_e32 v4, 0
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_gt_i32 s8, 10
-; GCN-NEXT: s_cselect_b32 s3, s3, 0
-; GCN-NEXT: s_cselect_b32 s2, s2, 0
-; GCN-NEXT: s_cselect_b32 s1, s1, 0
-; GCN-NEXT: s_cselect_b32 s0, s0, 0
-; GCN-NEXT: v_mov_b32_e32 v0, s0
-; GCN-NEXT: v_mov_b32_e32 v1, s1
-; GCN-NEXT: v_mov_b32_e32 v2, s2
-; GCN-NEXT: v_mov_b32_e32 v3, s3
-; GCN-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
-; GCN-NEXT: s_endpgm
+; GFX9-LABEL: select_and_v4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_cmp_gt_i32 s8, 10
+; GFX9-NEXT: s_cselect_b32 s3, s3, 0
+; GFX9-NEXT: s_cselect_b32 s2, s2, 0
+; GFX9-NEXT: s_cselect_b32 s1, s1, 0
+; GFX9-NEXT: s_cselect_b32 s0, s0, 0
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX9-NEXT: s_endpgm
+;
+; GFX942-LABEL: select_and_v4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_gt_i32 s8, 10
+; GFX942-NEXT: s_cselect_b32 s3, s3, 0
+; GFX942-NEXT: s_cselect_b32 s2, s2, 0
+; GFX942-NEXT: s_cselect_b32 s1, s1, 0
+; GFX942-NEXT: s_cselect_b32 s0, s0, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, s0
+; GFX942-NEXT: v_mov_b32_e32 v3, s1
+; GFX942-NEXT: v_mov_b32_e32 v4, s2
+; GFX942-NEXT: v_mov_b32_e32 v5, s3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[6:7]
+; GFX942-NEXT: s_endpgm
%c = icmp slt i32 %x, 11
%s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
%a = and <4 x i32> %s, %y
@@ -136,24 +156,43 @@ define amdgpu_kernel void @select_or3(ptr addrspace(1) %p, i32 %x, i32 %y) {
}
define amdgpu_kernel void @select_or_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) {
-; GCN-LABEL: select_or_v4:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s8, s[4:5], 0x2c
-; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
-; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GCN-NEXT: v_mov_b32_e32 v4, 0
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_lt_i32 s8, 11
-; GCN-NEXT: s_cselect_b32 s3, s3, -1
-; GCN-NEXT: s_cselect_b32 s2, s2, -1
-; GCN-NEXT: s_cselect_b32 s1, s1, -1
-; GCN-NEXT: s_cselect_b32 s0, s0, -1
-; GCN-NEXT: v_mov_b32_e32 v0, s0
-; GCN-NEXT: v_mov_b32_e32 v1, s1
-; GCN-NEXT: v_mov_b32_e32 v2, s2
-; GCN-NEXT: v_mov_b32_e32 v3, s3
-; GCN-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
-; GCN-NEXT: s_endpgm
+; GFX9-LABEL: select_or_v4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_cmp_lt_i32 s8, 11
+; GFX9-NEXT: s_cselect_b32 s3, s3, -1
+; GFX9-NEXT: s_cselect_b32 s2, s2, -1
+; GFX9-NEXT: s_cselect_b32 s1, s1, -1
+; GFX9-NEXT: s_cselect_b32 s0, s0, -1
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: v_mov_b32_e32 v2, s2
+; GFX9-NEXT: v_mov_b32_e32 v3, s3
+; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; GFX9-NEXT: s_endpgm
+;
+; GFX942-LABEL: select_or_v4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_lt_i32 s8, 11
+; GFX942-NEXT: s_cselect_b32 s3, s3, -1
+; GFX942-NEXT: s_cselect_b32 s2, s2, -1
+; GFX942-NEXT: s_cselect_b32 s1, s1, -1
+; GFX942-NEXT: s_cselect_b32 s0, s0, -1
+; GFX942-NEXT: v_mov_b32_e32 v2, s0
+; GFX942-NEXT: v_mov_b32_e32 v3, s1
+; GFX942-NEXT: v_mov_b32_e32 v4, s2
+; GFX942-NEXT: v_mov_b32_e32 v5, s3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[6:7]
+; GFX942-NEXT: s_endpgm
%c = icmp slt i32 %x, 11
%s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
%a = or <4 x i32> %s, %y
@@ -236,23 +275,41 @@ define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(ptr ad
}
define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(ptr addrspace(1) %p, i1 %cond) {
-; GCN-LABEL: sel_constants_sub_constant_sel_constants_v4i32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s2, s[4:5], 0x2c
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GCN-NEXT: v_mov_b32_e32 v4, 0
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_bitcmp1_b32 s2, 0
-; GCN-NEXT: s_cselect_b32 s2, 7, 14
-; GCN-NEXT: s_cselect_b32 s3, 6, 10
-; GCN-NEXT: s_cselect_b32 s4, 5, 6
-; GCN-NEXT: s_cselect_b32 s5, 9, 2
-; GCN-NEXT: v_mov_b32_e32 v0, s5
-; GCN-NEXT: v_mov_b32_e32 v1, s4
-; GCN-NEXT: v_mov_b32_e32 v2, s3
-; GCN-NEXT: v_mov_b32_e32 v3, s2
-; GCN-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
-; GCN-NEXT: s_endpgm
+; GFX9-LABEL: sel_constants_sub_constant_sel_constants_v4i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_bitcmp1_b32 s2, 0
+; GFX9-NEXT: s_cselect_b32 s2, 7, 14
+; GFX9-NEXT: s_cselect_b32 s3, 6, 10
+; GFX9-NEXT: s_cselect_b32 s4, 5, 6
+; GFX9-NEXT: s_cselect_b32 s5, 9, 2
+; GFX9-NEXT: v_mov_b32_e32 v0, s5
+; GFX9-NEXT: v_mov_b32_e32 v1, s4
+; GFX9-NEXT: v_mov_b32_e32 v2, s3
+; GFX9-NEXT: v_mov_b32_e32 v3, s2
+; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX942-LABEL: sel_constants_sub_constant_sel_constants_v4i32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_bitcmp1_b32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 7, 14
+; GFX942-NEXT: s_cselect_b32 s3, 6, 10
+; GFX942-NEXT: s_cselect_b32 s4, 5, 6
+; GFX942-NEXT: s_cselect_b32 s5, 9, 2
+; GFX942-NEXT: v_mov_b32_e32 v2, s5
+; GFX942-NEXT: v_mov_b32_e32 v3, s4
+; GFX942-NEXT: v_mov_b32_e32 v4, s3
+; GFX942-NEXT: v_mov_b32_e32 v5, s2
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-NEXT: s_endpgm
%sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3>
%bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel
store <4 x i32> %bo, ptr addrspace(1) %p, align 32
@@ -461,24 +518,43 @@ define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(ptr addrspace(1) %p
}
define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(ptr addrspace(1) %p, i1 %cond) {
-; GCN-LABEL: fsub_constant_sel_constants_v4f32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s2, s[4:5], 0x2c
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GCN-NEXT: s_mov_b32 s3, 0x41500000
-; GCN-NEXT: v_mov_b32_e32 v4, 0
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_bitcmp1_b32 s2, 0
-; GCN-NEXT: s_cselect_b32 s2, s3, 0x40c00000
-; GCN-NEXT: s_cselect_b32 s3, 0x41100000, 4.0
-; GCN-NEXT: s_cselect_b32 s4, 0x40a00000, 2.0
-; GCN-NEXT: s_cselect_b32 s5, 1.0, 0
-; GCN-NEXT: v_mov_b32_e32 v0, s5
-; GCN-NEXT: v_mov_b32_e32 v1, s4
-; GCN-NEXT: v_mov_b32_e32 v2, s3
-; GCN-NEXT: v_mov_b32_e32 v3, s2
-; GCN-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
-; GCN-NEXT: s_endpgm
+; GFX9-LABEL: fsub_constant_sel_constants_v4f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9-NEXT: s_mov_b32 s3, 0x41500000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_bitcmp1_b32 s2, 0
+; GFX9-NEXT: s_cselect_b32 s2, s3, 0x40c00000
+; GFX9-NEXT: s_cselect_b32 s3, 0x41100000, 4.0
+; GFX9-NEXT: s_cselect_b32 s4, 0x40a00000, 2.0
+; GFX9-NEXT: s_cselect_b32 s5, 1.0, 0
+; GFX9-NEXT: v_mov_b32_e32 v0, s5
+; GFX9-NEXT: v_mov_b32_e32 v1, s4
+; GFX9-NEXT: v_mov_b32_e32 v2, s3
+; GFX9-NEXT: v_mov_b32_e32 v3, s2
+; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX942-LABEL: fsub_constant_sel_constants_v4f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0x41500000
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_bitcmp1_b32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, s3, 0x40c00000
+; GFX942-NEXT: s_cselect_b32 s3, 0x41100000, 4.0
+; GFX942-NEXT: s_cselect_b32 s4, 0x40a00000, 2.0
+; GFX942-NEXT: s_cselect_b32 s5, 1.0, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, s5
+; GFX942-NEXT: v_mov_b32_e32 v3, s4
+; GFX942-NEXT: v_mov_b32_e32 v4, s3
+; GFX942-NEXT: v_mov_b32_e32 v5, s2
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1]
+; GFX942-NEXT: s_endpgm
%sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0>
%bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel
store <4 x float> %bo, ptr addrspace(1) %p, align 32
diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll
index 58cfd40113be2..21390003ee565 100644
--- a/llvm/test/CodeGen/AMDGPU/imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/imm.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx942 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s
; Use a 64-bit value with lo bits that can be represented as an inline constant
define amdgpu_kernel void @i64_imm_inline_lo(ptr addrspace(1) %out) {
@@ -25,6 +26,17 @@ define amdgpu_kernel void @i64_imm_inline_lo(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: i64_imm_inline_lo:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 5
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x12345678
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
entry:
store i64 1311768464867721221, ptr addrspace(1) %out ; 0x1234567800000005
ret void
@@ -53,6 +65,17 @@ define amdgpu_kernel void @i64_imm_inline_hi(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: i64_imm_inline_hi:
+; GFX942: ; %bb.0: ; %entry
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0x12345678
+; GFX942-NEXT: v_mov_b32_e32 v1, 5
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
entry:
store i64 21780256376, ptr addrspace(1) %out ; 0x0000000512345678
ret void
@@ -80,6 +103,17 @@ define amdgpu_kernel void @store_imm_neg_0.0_i64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_imm_neg_0.0_i64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_bfrev_b32_e32 v1, 1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store i64 -9223372036854775808, ptr addrspace(1) %out
ret void
}
@@ -104,6 +138,16 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_i32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_neg_0.0_i32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_bfrev_b32_e32 v0, 1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store i32 -2147483648, ptr addrspace(1) %out
ret void
}
@@ -128,6 +172,16 @@ define amdgpu_kernel void @store_inline_imm_0.0_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_0.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float 0.0, ptr addrspace(1) %out
ret void
}
@@ -152,6 +206,16 @@ define amdgpu_kernel void @store_imm_neg_0.0_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_imm_neg_0.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_bfrev_b32_e32 v0, 1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float -0.0, ptr addrspace(1) %out
ret void
}
@@ -176,6 +240,16 @@ define amdgpu_kernel void @store_inline_imm_0.5_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_0.5_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0.5
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float 0.5, ptr addrspace(1) %out
ret void
}
@@ -200,6 +274,16 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_0.5_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, -0.5
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float -0.5, ptr addrspace(1) %out
ret void
}
@@ -224,6 +308,16 @@ define amdgpu_kernel void @store_inline_imm_1.0_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_1.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float 1.0, ptr addrspace(1) %out
ret void
}
@@ -248,6 +342,16 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_1.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, -1.0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float -1.0, ptr addrspace(1) %out
ret void
}
@@ -272,6 +376,16 @@ define amdgpu_kernel void @store_inline_imm_2.0_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_2.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 2.0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float 2.0, ptr addrspace(1) %out
ret void
}
@@ -296,6 +410,16 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_2.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, -2.0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float -2.0, ptr addrspace(1) %out
ret void
}
@@ -320,6 +444,16 @@ define amdgpu_kernel void @store_inline_imm_4.0_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_4.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 4.0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float 4.0, ptr addrspace(1) %out
ret void
}
@@ -344,6 +478,16 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_4.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, -4.0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float -4.0, ptr addrspace(1) %out
ret void
}
@@ -368,6 +512,16 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_inv_2pi_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0.15915494
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float 0x3FC45F3060000000, ptr addrspace(1) %out
ret void
}
@@ -392,6 +546,16 @@ define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(ptr addrspace(1) %out)
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_inv_2pi_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0xbe22f983
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float 0xBFC45F3060000000, ptr addrspace(1) %out
ret void
}
@@ -416,6 +580,16 @@ define amdgpu_kernel void @store_literal_imm_f32(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_literal_imm_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0x45800000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store float 4096.0, ptr addrspace(1) %out
ret void
}
@@ -442,6 +616,17 @@ define amdgpu_kernel void @add_inline_imm_0.0_f32(ptr addrspace(1) %out, float %
; VI-NEXT: v_add_f32_e64 v0, s6, 0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_0.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 0
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 0.0
store float %y, ptr addrspace(1) %out
ret void
@@ -469,6 +654,17 @@ define amdgpu_kernel void @add_inline_imm_0.5_f32(ptr addrspace(1) %out, float %
; VI-NEXT: v_add_f32_e64 v0, s6, 0.5
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_0.5_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 0.5
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 0.5
store float %y, ptr addrspace(1) %out
ret void
@@ -496,6 +692,17 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(ptr addrspace(1) %out, flo
; VI-NEXT: v_add_f32_e64 v0, s6, -0.5
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_0.5_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, -0.5
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, -0.5
store float %y, ptr addrspace(1) %out
ret void
@@ -523,6 +730,17 @@ define amdgpu_kernel void @add_inline_imm_1.0_f32(ptr addrspace(1) %out, float %
; VI-NEXT: v_add_f32_e64 v0, s6, 1.0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_1.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 1.0
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 1.0
store float %y, ptr addrspace(1) %out
ret void
@@ -550,6 +768,17 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(ptr addrspace(1) %out, flo
; VI-NEXT: v_add_f32_e64 v0, s6, -1.0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_1.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, -1.0
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, -1.0
store float %y, ptr addrspace(1) %out
ret void
@@ -577,6 +806,17 @@ define amdgpu_kernel void @add_inline_imm_2.0_f32(ptr addrspace(1) %out, float %
; VI-NEXT: v_add_f32_e64 v0, s6, 2.0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_2.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 2.0
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 2.0
store float %y, ptr addrspace(1) %out
ret void
@@ -604,6 +844,17 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(ptr addrspace(1) %out, flo
; VI-NEXT: v_add_f32_e64 v0, s6, -2.0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_2.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, -2.0
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, -2.0
store float %y, ptr addrspace(1) %out
ret void
@@ -631,6 +882,17 @@ define amdgpu_kernel void @add_inline_imm_4.0_f32(ptr addrspace(1) %out, float %
; VI-NEXT: v_add_f32_e64 v0, s6, 4.0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_4.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 4.0
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 4.0
store float %y, ptr addrspace(1) %out
ret void
@@ -658,6 +920,17 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(ptr addrspace(1) %out, flo
; VI-NEXT: v_add_f32_e64 v0, s6, -4.0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_4.0_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, -4.0
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, -4.0
store float %y, ptr addrspace(1) %out
ret void
@@ -699,6 +972,24 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(ptr addrspace(1) %out,
; VI-NEXT: v_add_f32_e32 v0, 0.5, v0
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: commute_add_inline_imm_0.5_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s7, 0xf000
+; GFX942-NEXT: s_mov_b32 s6, -1
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: buffer_load_dword v0, off, s[8:11], 0
+; GFX942-NEXT: s_mov_b32 s4, s0
+; GFX942-NEXT: s_mov_b32 s5, s1
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: v_add_f32_e32 v0, 0.5, v0
+; GFX942-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX942-NEXT: s_endpgm
%x = load float, ptr addrspace(1) %in
%y = fadd float %x, 0.5
store float %y, ptr addrspace(1) %out
@@ -741,6 +1032,24 @@ define amdgpu_kernel void @commute_add_literal_f32(ptr addrspace(1) %out, ptr ad
; VI-NEXT: v_add_f32_e32 v0, 0x44800000, v0
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: commute_add_literal_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s7, 0xf000
+; GFX942-NEXT: s_mov_b32 s6, -1
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: buffer_load_dword v0, off, s[8:11], 0
+; GFX942-NEXT: s_mov_b32 s4, s0
+; GFX942-NEXT: s_mov_b32 s5, s1
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: v_add_f32_e32 v0, 0x44800000, v0
+; GFX942-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX942-NEXT: s_endpgm
%x = load float, ptr addrspace(1) %in
%y = fadd float %x, 1024.0
store float %y, ptr addrspace(1) %out
@@ -769,6 +1078,17 @@ define amdgpu_kernel void @add_inline_imm_1_f32(ptr addrspace(1) %out, float %x)
; VI-NEXT: v_add_f32_e64 v0, s6, 1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_1_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 1
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 0x36a0000000000000
store float %y, ptr addrspace(1) %out
ret void
@@ -796,6 +1116,17 @@ define amdgpu_kernel void @add_inline_imm_2_f32(ptr addrspace(1) %out, float %x)
; VI-NEXT: v_add_f32_e64 v0, s6, 2
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_2_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 2
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 0x36b0000000000000
store float %y, ptr addrspace(1) %out
ret void
@@ -823,6 +1154,17 @@ define amdgpu_kernel void @add_inline_imm_16_f32(ptr addrspace(1) %out, float %x
; VI-NEXT: v_add_f32_e64 v0, s6, 16
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_16_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 16
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 0x36e0000000000000
store float %y, ptr addrspace(1) %out
ret void
@@ -852,6 +1194,18 @@ define amdgpu_kernel void @add_inline_imm_neg_1_f32(ptr addrspace(1) %out, float
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_1_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_add_i32 s4, s6, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, s4
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%xbc = bitcast float %x to i32
%y = add i32 %xbc, -1
%ybc = bitcast i32 %y to float
@@ -883,6 +1237,18 @@ define amdgpu_kernel void @add_inline_imm_neg_2_f32(ptr addrspace(1) %out, float
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_2_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_add_i32 s4, s6, -2
+; GFX942-NEXT: v_mov_b32_e32 v0, s4
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%xbc = bitcast float %x to i32
%y = add i32 %xbc, -2
%ybc = bitcast i32 %y to float
@@ -914,6 +1280,18 @@ define amdgpu_kernel void @add_inline_imm_neg_16_f32(ptr addrspace(1) %out, floa
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_16_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_add_i32 s4, s6, -16
+; GFX942-NEXT: v_mov_b32_e32 v0, s4
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%xbc = bitcast float %x to i32
%y = add i32 %xbc, -16
%ybc = bitcast i32 %y to float
@@ -943,6 +1321,17 @@ define amdgpu_kernel void @add_inline_imm_63_f32(ptr addrspace(1) %out, float %x
; VI-NEXT: v_add_f32_e64 v0, s6, 63
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_63_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 63
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 0x36ff800000000000
store float %y, ptr addrspace(1) %out
ret void
@@ -970,6 +1359,17 @@ define amdgpu_kernel void @add_inline_imm_64_f32(ptr addrspace(1) %out, float %x
; VI-NEXT: v_add_f32_e64 v0, s6, 64
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_64_f32:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f32_e64 v0, s6, 64
+; GFX942-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd float %x, 0x3700000000000000
store float %y, ptr addrspace(1) %out
ret void
@@ -999,6 +1399,17 @@ define amdgpu_kernel void @add_inline_imm_0.0_f64(ptr addrspace(1) %out, [8 x i3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_0.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 0
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0.0
store double %y, ptr addrspace(1) %out
ret void
@@ -1028,6 +1439,17 @@ define amdgpu_kernel void @add_inline_imm_0.5_f64(ptr addrspace(1) %out, [8 x i3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_0.5_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 0.5
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0.5
store double %y, ptr addrspace(1) %out
ret void
@@ -1057,6 +1479,17 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_f64(ptr addrspace(1) %out, [8
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_0.5_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], -0.5
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, -0.5
store double %y, ptr addrspace(1) %out
ret void
@@ -1086,6 +1519,17 @@ define amdgpu_kernel void @add_inline_imm_1.0_f64(ptr addrspace(1) %out, [8 x i3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_1.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 1.0
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 1.0
store double %y, ptr addrspace(1) %out
ret void
@@ -1115,6 +1559,17 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_f64(ptr addrspace(1) %out, [8
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_1.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], -1.0
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, -1.0
store double %y, ptr addrspace(1) %out
ret void
@@ -1144,6 +1599,17 @@ define amdgpu_kernel void @add_inline_imm_2.0_f64(ptr addrspace(1) %out, [8 x i3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_2.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 2.0
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 2.0
store double %y, ptr addrspace(1) %out
ret void
@@ -1173,6 +1639,17 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_f64(ptr addrspace(1) %out, [8
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_2.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], -2.0
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, -2.0
store double %y, ptr addrspace(1) %out
ret void
@@ -1202,6 +1679,17 @@ define amdgpu_kernel void @add_inline_imm_4.0_f64(ptr addrspace(1) %out, [8 x i3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_4.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 4.0
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 4.0
store double %y, ptr addrspace(1) %out
ret void
@@ -1231,6 +1719,17 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_f64(ptr addrspace(1) %out, [8
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_4.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], -4.0
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, -4.0
store double %y, ptr addrspace(1) %out
ret void
@@ -1262,6 +1761,17 @@ define amdgpu_kernel void @add_inline_imm_inv_2pi_f64(ptr addrspace(1) %out, [8
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_inv_2pi_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 0.15915494309189532
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0x3fc45f306dc9c882
store double %y, ptr addrspace(1) %out
ret void
@@ -1295,6 +1805,19 @@ define amdgpu_kernel void @add_m_inv_2pi_f64(ptr addrspace(1) %out, [8 x i32], d
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_m_inv_2pi_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
+; GFX942-NEXT: v_mov_b32_e32 v1, 0xbfc45f30
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], v[0:1]
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0xbfc45f306dc9c882
store double %y, ptr addrspace(1) %out
ret void
@@ -1324,6 +1847,17 @@ define amdgpu_kernel void @add_inline_imm_1_f64(ptr addrspace(1) %out, [8 x i32]
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_1_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 1
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0x0000000000000001
store double %y, ptr addrspace(1) %out
ret void
@@ -1353,6 +1887,17 @@ define amdgpu_kernel void @add_inline_imm_2_f64(ptr addrspace(1) %out, [8 x i32]
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_2_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 2
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0x0000000000000002
store double %y, ptr addrspace(1) %out
ret void
@@ -1382,6 +1927,17 @@ define amdgpu_kernel void @add_inline_imm_16_f64(ptr addrspace(1) %out, [8 x i32
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_16_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 16
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0x0000000000000010
store double %y, ptr addrspace(1) %out
ret void
@@ -1409,6 +1965,17 @@ define amdgpu_kernel void @add_inline_imm_neg_1_f64(ptr addrspace(1) %out, [8 x
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_1_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: v_mov_b32_e32 v0, -1
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v1, v0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0xffffffffffffffff
store double %y, ptr addrspace(1) %out
ret void
@@ -1436,6 +2003,17 @@ define amdgpu_kernel void @add_inline_imm_neg_2_f64(ptr addrspace(1) %out, [8 x
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_2_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, -2
+; GFX942-NEXT: v_mov_b32_e32 v1, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0xfffffffffffffffe
store double %y, ptr addrspace(1) %out
ret void
@@ -1463,6 +2041,17 @@ define amdgpu_kernel void @add_inline_imm_neg_16_f64(ptr addrspace(1) %out, [8 x
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_neg_16_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, -16
+; GFX942-NEXT: v_mov_b32_e32 v1, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0xfffffffffffffff0
store double %y, ptr addrspace(1) %out
ret void
@@ -1492,6 +2081,17 @@ define amdgpu_kernel void @add_inline_imm_63_f64(ptr addrspace(1) %out, [8 x i32
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_63_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 63
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0x000000000000003F
store double %y, ptr addrspace(1) %out
ret void
@@ -1521,6 +2121,17 @@ define amdgpu_kernel void @add_inline_imm_64_f64(ptr addrspace(1) %out, [8 x i32
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: add_inline_imm_64_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_add_f64 v[0:1], s[6:7], 64
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
%y = fadd double %x, 0x0000000000000040
store double %y, ptr addrspace(1) %out
ret void
@@ -1548,6 +2159,17 @@ define amdgpu_kernel void @store_inline_imm_0.0_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_0.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v1, v0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double 0.0, ptr addrspace(1) %out
ret void
}
@@ -1574,6 +2196,17 @@ define amdgpu_kernel void @store_literal_imm_neg_0.0_f64(ptr addrspace(1) %out)
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_literal_imm_neg_0.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_bfrev_b32_e32 v1, 1
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double -0.0, ptr addrspace(1) %out
ret void
}
@@ -1600,6 +2233,17 @@ define amdgpu_kernel void @store_inline_imm_0.5_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_0.5_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x3fe00000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double 0.5, ptr addrspace(1) %out
ret void
}
@@ -1626,6 +2270,17 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_0.5_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0xbfe00000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double -0.5, ptr addrspace(1) %out
ret void
}
@@ -1652,6 +2307,17 @@ define amdgpu_kernel void @store_inline_imm_1.0_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_1.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x3ff00000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double 1.0, ptr addrspace(1) %out
ret void
}
@@ -1678,6 +2344,17 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_1.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0xbff00000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double -1.0, ptr addrspace(1) %out
ret void
}
@@ -1704,6 +2381,17 @@ define amdgpu_kernel void @store_inline_imm_2.0_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_2.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double 2.0, ptr addrspace(1) %out
ret void
}
@@ -1730,6 +2418,17 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_2.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, -2.0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double -2.0, ptr addrspace(1) %out
ret void
}
@@ -1756,6 +2455,17 @@ define amdgpu_kernel void @store_inline_imm_4.0_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_4.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x40100000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double 4.0, ptr addrspace(1) %out
ret void
}
@@ -1782,6 +2492,17 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_4.0_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0xc0100000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double -4.0, ptr addrspace(1) %out
ret void
}
@@ -1808,6 +2529,17 @@ define amdgpu_kernel void @store_inv_2pi_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inv_2pi_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x3fc45f30
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double 0x3fc45f306dc9c882, ptr addrspace(1) %out
ret void
}
@@ -1834,6 +2566,17 @@ define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f64(ptr addrspace(1) %out)
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_inline_imm_m_inv_2pi_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
+; GFX942-NEXT: v_mov_b32_e32 v1, 0xbfc45f30
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double 0xbfc45f306dc9c882, ptr addrspace(1) %out
ret void
}
@@ -1860,6 +2603,17 @@ define amdgpu_kernel void @store_literal_imm_f64(ptr addrspace(1) %out) {
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: store_literal_imm_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-NEXT: s_mov_b32 s3, 0xf000
+; GFX942-NEXT: s_mov_b32 s2, -1
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0x40b00000
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX942-NEXT: s_endpgm
store double 4096.0, ptr addrspace(1) %out
ret void
}
@@ -1871,6 +2625,13 @@ define amdgpu_vs void @literal_folding(float %arg) {
; GCN-NEXT: v_mul_f32_e32 v0, 0xbf4353f8, v0
; GCN-NEXT: exp pos0 v1, v1, v0, v0 done
; GCN-NEXT: s_endpgm
+;
+; GFX942-LABEL: literal_folding:
+; GFX942: ; %bb.0: ; %main_body
+; GFX942-NEXT: v_mul_f32_e32 v1, 0x3f4353f8, v0
+; GFX942-NEXT: v_mul_f32_e32 v0, 0xbf4353f8, v0
+; GFX942-NEXT: exp pos0 v1, v1, v0, v0 done
+; GFX942-NEXT: s_endpgm
main_body:
%tmp = fmul float %arg, 0x3FE86A7F00000000
%tmp1 = fmul float %arg, 0xBFE86A7F00000000
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index 6a45b961a61c8..101787abf8ea7 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -32,6 +33,16 @@ define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in)
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: sint_to_fp_i32_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
+; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%result = sitofp i32 %in to double
store double %result, ptr addrspace(1) %out
ret void
@@ -73,6 +84,18 @@ define amdgpu_kernel void @sint_to_fp_i1_f64(ptr addrspace(1) %out, i32 %in) {
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: sint_to_fp_i1_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_eq_u32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 0xbff00000, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, s2
+; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%fp = sitofp i1 %cmp to double
store double %fp, ptr addrspace(1) %out, align 4
@@ -113,6 +136,19 @@ define amdgpu_kernel void @sint_to_fp_i1_f64_load(ptr addrspace(1) %out, i1 %in)
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: sint_to_fp_i1_f64_load:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_bitcmp1_b32 s2, 0
+; GFX942-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3]
+; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
+; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%fp = sitofp i1 %in to double
store double %fp, ptr addrspace(1) %out, align 8
ret void
@@ -150,6 +186,18 @@ define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %i
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_sint_to_fp_i64_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], s3
+; GFX942-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
+; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%result = sitofp i64 %in to double
store double %result, ptr addrspace(1) %out
ret void
@@ -199,6 +247,22 @@ define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr ad
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: v_sint_to_fp_i64_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: v_cvt_f64_i32_e32 v[2:3], v1
+; GFX942-NEXT: v_ldexp_f64 v[2:3], v[2:3], 32
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
+; GFX942-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%val = load i64, ptr addrspace(1) %gep, align 8
@@ -238,6 +302,17 @@ define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in)
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_sint_to_fp_i8_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_sext_i32_i8 s2, s2
+; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], s2
+; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%fp = sitofp i8 %in to double
store double %fp, ptr addrspace(1) %out
ret void
@@ -258,6 +333,14 @@ define double @v_sint_to_fp_i8_to_f64(i8 %in) {
; VI-NEXT: v_bfe_i32 v0, v0, 0, 16
; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: v_sint_to_fp_i8_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX942-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX942-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%fp = sitofp i8 %in to double
ret double %fp
}
@@ -296,6 +379,18 @@ define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_select_sint_to_fp_i1_vals_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_eq_u32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 0xbff00000, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, s2
+; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double -1.0, double 0.0
store double %select, ptr addrspace(1) %out, align 8
@@ -313,6 +408,18 @@ define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: v_select_sint_to_fp_i1_vals_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double -1.0, double 0.0
store double %select, ptr addrspace(1) %out, align 8
@@ -353,6 +460,18 @@ define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_select_sint_to_fp_i1_vals_i64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_eq_u32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 0xbff00000, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, s2
+; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
store i64 %select, ptr addrspace(1) %out, align 8
@@ -370,6 +489,18 @@ define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: v_select_sint_to_fp_i1_vals_i64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
store i64 %select, ptr addrspace(1) %out, align 8
@@ -388,6 +519,18 @@ define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: v_swap_select_sint_to_fp_i1_vals_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0xbff00000
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cndmask_b32_e64 v5, v3, 0, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 0.0, double -1.0
store double %select, ptr addrspace(1) %out, align 8
@@ -429,6 +572,18 @@ define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1)
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_swap_select_sint_to_fp_i1_vals_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_eq_u32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 0, 0xbff00000
+; GFX942-NEXT: v_mov_b32_e32 v1, s2
+; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 0.0, double -1.0
store double %select, ptr addrspace(1) %out, align 8
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index ab278c3b63a3e..983acfc2c0699 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -48,6 +49,22 @@ define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr ad
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: v_uint_to_fp_i64_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-NEXT: v_lshlrev_b32_e32 v0, 3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], v1
+; GFX942-NEXT: v_ldexp_f64 v[2:3], v[2:3], 32
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
+; GFX942-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1]
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
%val = load i64, ptr addrspace(1) %gep, align 8
@@ -88,6 +105,18 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %i
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_uint_to_fp_i64_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s3
+; GFX942-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
+; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3]
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cast = uitofp i64 %in to double
store double %cast, ptr addrspace(1) %out, align 8
ret void
@@ -136,6 +165,23 @@ define amdgpu_kernel void @s_uint_to_fp_v2i64_to_v2f64(ptr addrspace(1) %out, <2
; VI-NEXT: v_mov_b32_e32 v4, s0
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_uint_to_fp_v2i64_to_v2f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s3
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s2
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s1
+; GFX942-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
+; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
+; GFX942-NEXT: v_ldexp_f64 v[0:1], v[4:5], 32
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s0
+; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[6:7]
+; GFX942-NEXT: s_endpgm
%cast = uitofp <2 x i64> %in to <2 x double>
store <2 x double> %cast, ptr addrspace(1) %out, align 16
ret void
@@ -210,6 +256,32 @@ define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(ptr addrspace(1) %out, <4
; VI-NEXT: flat_store_dwordx4 v[10:11], v[4:7]
; VI-NEXT: flat_store_dwordx4 v[8:9], v[0:3]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_uint_to_fp_v4i64_to_v4f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s11
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s10
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s9
+; GFX942-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32
+; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3]
+; GFX942-NEXT: v_ldexp_f64 v[0:1], v[4:5], 32
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s8
+; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s15
+; GFX942-NEXT: v_ldexp_f64 v[4:5], v[4:5], 32
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s14
+; GFX942-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7]
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s13
+; GFX942-NEXT: v_ldexp_f64 v[4:5], v[4:5], 32
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[8:9], s12
+; GFX942-NEXT: v_add_f64 v[4:5], v[4:5], v[8:9]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: s_endpgm
%cast = uitofp <4 x i64> %in to <4 x double>
store <4 x double> %cast, ptr addrspace(1) %out, align 16
ret void
@@ -243,6 +315,16 @@ define amdgpu_kernel void @s_uint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %i
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_uint_to_fp_i32_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
+; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cast = uitofp i32 %in to double
store double %cast, ptr addrspace(1) %out, align 8
ret void
@@ -262,6 +344,16 @@ define amdgpu_kernel void @s_uint_to_fp_v2i32_to_v2f64(ptr addrspace(1) %out, <2
; GCN-NEXT: v_mov_b32_e32 v4, s0
; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_uint_to_fp_v2i32_to_v2f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s3
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_endpgm
%cast = uitofp <2 x i32> %in to <2 x double>
store <2 x double> %cast, ptr addrspace(1) %out, align 16
ret void
@@ -313,6 +405,20 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(ptr addrspace(1) %out, <4
; VI-NEXT: v_mov_b32_e32 v5, s5
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_uint_to_fp_v4i32_to_v4f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s3
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s2
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s1
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7]
+; GFX942-NEXT: s_endpgm
%cast = uitofp <4 x i32> %in to <4 x double>
store <4 x double> %cast, ptr addrspace(1) %out, align 16
ret void
@@ -354,6 +460,18 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64(ptr addrspace(1) %out, i32 %in)
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: uint_to_fp_i1_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_eq_u32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, s2
+; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to double
store double %fp, ptr addrspace(1) %out, align 4
@@ -394,6 +512,19 @@ define amdgpu_kernel void @uint_to_fp_i1_to_f64_load(ptr addrspace(1) %out, i1 %
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: uint_to_fp_i1_to_f64_load:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_bitcmp1_b32 s2, 0
+; GFX942-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
+; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%fp = uitofp i1 %in to double
store double %fp, ptr addrspace(1) %out, align 8
ret void
@@ -429,6 +560,17 @@ define amdgpu_kernel void @s_uint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in)
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_uint_to_fp_i8_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_and_b32 s2, s2, 0xff
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s2
+; GFX942-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%fp = uitofp i8 %in to double
store double %fp, ptr addrspace(1) %out
ret void
@@ -450,6 +592,14 @@ define double @v_uint_to_fp_i8_to_f64(i8 %in) {
; VI-NEXT: v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; VI-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: v_uint_to_fp_i8_to_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_mov_b32 s0, 0xffff
+; GFX942-NEXT: v_and_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%fp = uitofp i8 %in to double
ret double %fp
}
@@ -488,6 +638,18 @@ define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_select_uint_to_fp_i1_vals_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_eq_u32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, s2
+; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 1.0, double 0.0
store double %select, ptr addrspace(1) %out, align 8
@@ -505,6 +667,18 @@ define void @v_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) {
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: v_select_uint_to_fp_i1_vals_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0x3ff00000
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 1.0, double 0.0
store double %select, ptr addrspace(1) %out, align 8
@@ -545,6 +719,18 @@ define amdgpu_kernel void @s_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_select_uint_to_fp_i1_vals_i64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_eq_u32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 0x3ff00000, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, s2
+; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
store i64 %select, ptr addrspace(1) %out, align 8
@@ -562,6 +748,18 @@ define void @v_select_uint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) {
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: v_select_uint_to_fp_i1_vals_i64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0x3ff00000
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
store i64 %select, ptr addrspace(1) %out, align 8
@@ -603,6 +801,18 @@ define amdgpu_kernel void @s_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1)
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
+;
+; GFX942-LABEL: s_swap_select_uint_to_fp_i1_vals_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_load_dword s2, s[4:5], 0x8
+; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-NEXT: s_cmp_eq_u32 s2, 0
+; GFX942-NEXT: s_cselect_b32 s2, 0, 0x3ff00000
+; GFX942-NEXT: v_mov_b32_e32 v1, s2
+; GFX942-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
+; GFX942-NEXT: s_endpgm
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 0.0, double 1.0
store double %select, ptr addrspace(1) %out, align 8
@@ -620,6 +830,18 @@ define void @v_swap_select_uint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in
; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: v_swap_select_uint_to_fp_i1_vals_f64:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0x3ff00000
+; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_cndmask_b32_e64 v5, v3, 0, vcc
+; GFX942-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq i32 %in, 0
%select = select i1 %cmp, double 0.0, double 1.0
store double %select, ptr addrspace(1) %out, align 8
More information about the llvm-commits
mailing list