[llvm] [AMDGPU] fcanonicalize.bf16.ll - regenerate test checks (PR #161026)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 27 14:39:40 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
---
Patch is 111.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161026.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize.bf16.ll (+950-895)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.bf16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.bf16.ll
index a4cdb0387df9a..d747fb7cce7dc 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.bf16.ll
@@ -15,67 +15,67 @@ declare <32 x bfloat> @llvm.canonicalize.v32bf16(<32 x bfloat>) #0
declare <64 x bfloat> @llvm.canonicalize.v64bf16(<64 x bfloat>) #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
-; GFX1250-LABEL: test_fold_canonicalize_undef_value_bf16:
-; GFX1250: %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_store_b16 v0, v0, s[0:1]
-; GFX1250-NEXT: s_endpgm
define amdgpu_kernel void @test_fold_canonicalize_undef_value_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: test_fold_canonicalize_undef_value_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b16 v0, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat undef)
store bfloat %canonicalized, ptr addrspace(1) %out
ret void
}
-; GFX1250-LABEL: v_test_canonicalize_var_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u16 v0, v0, s[0:1]
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: global_store_b16 v[0:1], v0, off
-; GFX1250-NEXT: s_endpgm
define amdgpu_kernel void @v_test_canonicalize_var_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: v_test_canonicalize_var_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_u16 v0, v0, s[0:1]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250-NEXT: global_store_b16 v[0:1], v0, off
+; GFX1250-NEXT: s_endpgm
%val = load bfloat, ptr addrspace(1) %out
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val)
store bfloat %canonicalized, ptr addrspace(1) poison
ret void
}
-; GFX1250-LABEL: s_test_canonicalize_var_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v1, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_lshl_b32 s2, s2, 16
-; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_max_num_f32_e64 v0, s2, s2
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: global_store_b16 v1, v0, s[0:1]
-; GFX1250-NEXT: s_endpgm
define amdgpu_kernel void @s_test_canonicalize_var_bf16(ptr addrspace(1) %out, i16 zeroext %val.arg) #1 {
+; GFX1250-LABEL: s_test_canonicalize_var_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_lshl_b32 s2, s2, 16
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_max_num_f32_e64 v0, s2, s2
+; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250-NEXT: global_store_b16 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
%val = bitcast i16 %val.arg to bfloat
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val)
store bfloat %canonicalized, ptr addrspace(1) %out
ret void
}
-; GFX1250-LABEL: v_test_canonicalize_build_vector_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
define <2 x bfloat> @v_test_canonicalize_build_vector_v2bf16(bfloat %lo, bfloat %hi) #1 {
+; GFX1250-LABEL: v_test_canonicalize_build_vector_v2bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_dual_max_num_f32 v1, v1, v1 :: v_dual_max_num_f32 v0, v0, v0
+; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%ins0 = insertelement <2 x bfloat> poison, bfloat %lo, i32 0
%ins1 = insertelement <2 x bfloat> %ins0, bfloat %hi, i32 1
%canonicalized = call <2 x bfloat> @llvm.canonicalize.v2bf16(<2 x bfloat> %ins1)
@@ -83,22 +83,22 @@ define <2 x bfloat> @v_test_canonicalize_build_vector_v2bf16(bfloat %lo, bfloat
}
-; GFX1250-LABEL: v_test_canonicalize_fabs_var_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v1, 0x7fff, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
define amdgpu_kernel void @v_test_canonicalize_fabs_var_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: v_test_canonicalize_fabs_var_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: v_and_b32_e32 v1, 0x7fff, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%val = load bfloat, ptr addrspace(1) %out
%val.fabs = call bfloat @llvm.fabs.bf16(bfloat %val)
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val.fabs)
@@ -107,22 +107,22 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_bf16(ptr addrspace(1) %o
}
-; GFX1250-LABEL: v_test_canonicalize_fneg_fabs_var_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_or_b32_e32 v1, 0x8000, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: v_test_canonicalize_fneg_fabs_var_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: v_or_b32_e32 v1, 0x8000, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%val = load bfloat, ptr addrspace(1) %out
%val.fabs = call bfloat @llvm.fabs.bf16(bfloat %val)
%val.fabs.fneg = fneg bfloat %val.fabs
@@ -131,22 +131,22 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_bf16(ptr addrspace(
ret void
}
-; GFX1250-LABEL: v_test_canonicalize_fneg_var_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_xor_b32_e32 v1, 0x8000, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
define amdgpu_kernel void @v_test_canonicalize_fneg_var_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: v_test_canonicalize_fneg_var_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: v_xor_b32_e32 v1, 0x8000, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%val = load bfloat, ptr addrspace(1) %out
%val.fneg = fneg bfloat %val
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val.fneg)
@@ -154,22 +154,22 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_bf16(ptr addrspace(1) %o
ret void
}
-; GFX1250-LABEL: v_test_no_denormals_canonicalize_fneg_var_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_xor_b32_e32 v1, 0x8000, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_bf16(ptr addrspace(1) %out) #2 {
+; GFX1250-LABEL: v_test_no_denormals_canonicalize_fneg_var_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: v_xor_b32_e32 v1, 0x8000, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%val = load bfloat, ptr addrspace(1) %out
%val.fneg = fneg bfloat %val
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat %val.fneg)
@@ -177,22 +177,22 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_bf16(ptr ad
ret void
}
-; GFX1250-LABEL: v_test_no_denormals_canonicalize_fneg_fabs_var_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_or_b32_e32 v1, 0x8000, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_bf16(ptr addrspace(1) %out) #2 {
+; GFX1250-LABEL: v_test_no_denormals_canonicalize_fneg_fabs_var_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_u16 v1, v0, s[0:1]
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: v_or_b32_e32 v1, 0x8000, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX1250-NEXT: v_max_num_f32_e32 v1, v1, v1
+; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%val = load bfloat, ptr addrspace(1) %out
%val.fabs = call bfloat @llvm.fabs.bf16(bfloat %val)
%val.fabs.fneg = fneg bfloat %val.fabs
@@ -201,217 +201,231 @@ define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_bf16(p
ret void
}
+define amdgpu_kernel void @test_fold_canonicalize_p0_bf16(ptr addrspace(1) %out) #1 {
; GFX1250-LABEL: test_fold_canonicalize_p0_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_store_b16 v0, v0, s[0:1]
-; GFX1250-NEXT: s_endpgm
- define amdgpu_kernel void @test_fold_canonicalize_p0_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b16 v0, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 0.0)
store bfloat %canonicalized, ptr addrspace(1) %out
ret void
}
-; GFX1250-LABEL: test_fold_canonicalize_n0_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xffff8000
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
-; GFX1250-NEXT: .Lfunc_end10:
+
define amdgpu_kernel void @test_fold_canonicalize_n0_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: test_fold_canonicalize_n0_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xffff8000
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -0.0)
store bfloat %canonicalized, ptr addrspace(1) %out
ret void
}
-; GFX1250-LABEL: test_fold_canonicalize_p1_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3f80
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
+
define amdgpu_kernel void @test_fold_canonicalize_p1_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: test_fold_canonicalize_p1_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3f80
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 1.0)
store bfloat %canonicalized, ptr addrspace(1) %out
ret void
}
-; GFX1250-LABEL: test_fold_canonicalize_n1_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xffffbf80
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
-; GFX1250-NEXT: .Lfunc_end12:
+
define amdgpu_kernel void @test_fold_canonicalize_n1_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: test_fold_canonicalize_n1_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0xffffbf80
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -1.0)
store bfloat %canonicalized, ptr addrspace(1) %out
ret void
}
-; GFX1250-LABEL: test_fold_canonicalize_literal_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x4180
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
+
define amdgpu_kernel void @test_fold_canonicalize_literal_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: test_fold_canonicalize_literal_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x4180
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 16.0)
store bfloat %canonicalized, ptr addrspace(1) %out
ret void
}
-; GFX1250-LABEL: test_default_denormals_fold_canonicalize_denormal0_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
+
define amdgpu_kernel void @test_default_denormals_fold_canonicalize_denormal0_bf16(ptr addrspace(1) %out) #1 {
+; GFX1250-LABEL: test_default_denormals_fold_canonicalize_denormal0_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3ff
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
%canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 0xR03FF)
store bfloat %canonicalized, ptr addrspace(1) %out
ret void
}
-; GFX1250-LABEL: test_denormals_fold_canonicalize_denormal0_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/161026
More information about the llvm-commits
mailing list