[llvm] AMDGPU: Add baseline test for nofpclass on call results (PR #167263)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 9 21:04:51 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/167263.diff
1 Files Affected:
- (added) llvm/test/CodeGen/AMDGPU/nofpclass-call.ll (+199)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll b/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll
new file mode 100644
index 0000000000000..1861f02ec8b1c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/nofpclass-call.ll
@@ -0,0 +1,199 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Check that nofpclass attributes on call returns are used in
+; selectiondag.
+
+define internal float @func_f32(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: func_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: global_load_dword v0, v[0:1], off glc
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %ld = load volatile float, ptr addrspace(1) %ptr
+ ret float %ld
+}
+
+define float @call_nofpclass_funcs_f32(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: call_nofpclass_funcs_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s18, s33
+; CHECK-NEXT: s_mov_b32 s33, s32
+; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
+; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s33 ; 4-byte Folded Spill
+; CHECK-NEXT: s_mov_b64 exec, s[16:17]
+; CHECK-NEXT: s_addk_i32 s32, 0x400
+; CHECK-NEXT: v_writelane_b32 v4, s30, 0
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, func_f32 at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, func_f32 at rel32@hi+12
+; CHECK-NEXT: v_writelane_b32 v4, s31, 1
+; CHECK-NEXT: v_mov_b32_e32 v2, v0
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: v_mov_b32_e32 v3, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, v2
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: v_max_f32_e32 v1, v3, v3
+; CHECK-NEXT: v_max_f32_e32 v0, v0, v0
+; CHECK-NEXT: v_min_f32_e32 v0, v1, v0
+; CHECK-NEXT: v_readlane_b32 s31, v4, 1
+; CHECK-NEXT: v_readlane_b32 s30, v4, 0
+; CHECK-NEXT: s_mov_b32 s32, s33
+; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s33 ; 4-byte Folded Reload
+; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: s_mov_b32 s33, s18
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %call0 = call nofpclass(nan) float @func_f32(ptr addrspace(1) %ptr)
+ %call1 = call nofpclass(nan) float @func_f32(ptr addrspace(1) %ptr)
+ %min = call float @llvm.minnum.f32(float %call0, float %call1)
+ ret float %min
+}
+
+define internal <2 x float> @func_v2f32(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: func_v2f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %ld = load volatile <2 x float>, ptr addrspace(1) %ptr
+ ret <2 x float> %ld
+}
+
+define <2 x float> @call_nofpclass_funcs_v2f32(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: call_nofpclass_funcs_v2f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s18, s33
+; CHECK-NEXT: s_mov_b32 s33, s32
+; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
+; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill
+; CHECK-NEXT: s_mov_b64 exec, s[16:17]
+; CHECK-NEXT: s_addk_i32 s32, 0x400
+; CHECK-NEXT: v_writelane_b32 v6, s30, 0
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, func_v2f32 at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, func_v2f32 at rel32@hi+12
+; CHECK-NEXT: v_writelane_b32 v6, s31, 1
+; CHECK-NEXT: v_mov_b32_e32 v2, v1
+; CHECK-NEXT: v_mov_b32_e32 v3, v0
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: v_mov_b32_e32 v4, v0
+; CHECK-NEXT: v_mov_b32_e32 v5, v1
+; CHECK-NEXT: v_mov_b32_e32 v0, v3
+; CHECK-NEXT: v_mov_b32_e32 v1, v2
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: v_max_f32_e32 v2, v4, v4
+; CHECK-NEXT: v_max_f32_e32 v0, v0, v0
+; CHECK-NEXT: v_min_f32_e32 v0, v2, v0
+; CHECK-NEXT: v_max_f32_e32 v2, v5, v5
+; CHECK-NEXT: v_max_f32_e32 v1, v1, v1
+; CHECK-NEXT: v_min_f32_e32 v1, v2, v1
+; CHECK-NEXT: v_readlane_b32 s31, v6, 1
+; CHECK-NEXT: v_readlane_b32 s30, v6, 0
+; CHECK-NEXT: s_mov_b32 s32, s33
+; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
+; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: s_mov_b32 s33, s18
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %call0 = call nofpclass(nan) <2 x float> @func_v2f32(ptr addrspace(1) %ptr)
+ %call1 = call nofpclass(nan) <2 x float> @func_v2f32(ptr addrspace(1) %ptr)
+ %min = call <2 x float> @llvm.minnum.v2f32(<2 x float> %call0, <2 x float> %call1)
+ ret <2 x float> %min
+}
+
+define internal double @func_f64(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: func_f64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: global_load_dwordx2 v[0:1], v[0:1], off glc
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %ld = load volatile double, ptr addrspace(1) %ptr
+ ret double %ld
+}
+
+define double @call_nofpclass_funcs_f64(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: call_nofpclass_funcs_f64:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: s_mov_b32 s18, s33
+; CHECK-NEXT: s_mov_b32 s33, s32
+; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
+; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill
+; CHECK-NEXT: s_mov_b64 exec, s[16:17]
+; CHECK-NEXT: s_addk_i32 s32, 0x400
+; CHECK-NEXT: v_writelane_b32 v6, s30, 0
+; CHECK-NEXT: s_getpc_b64 s[16:17]
+; CHECK-NEXT: s_add_u32 s16, s16, func_f64 at rel32@lo+4
+; CHECK-NEXT: s_addc_u32 s17, s17, func_f64 at rel32@hi+12
+; CHECK-NEXT: v_writelane_b32 v6, s31, 1
+; CHECK-NEXT: v_mov_b32_e32 v4, v1
+; CHECK-NEXT: v_mov_b32_e32 v5, v0
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: v_mov_b32_e32 v2, v0
+; CHECK-NEXT: v_mov_b32_e32 v3, v1
+; CHECK-NEXT: v_mov_b32_e32 v0, v5
+; CHECK-NEXT: v_mov_b32_e32 v1, v4
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; CHECK-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; CHECK-NEXT: v_readlane_b32 s31, v6, 1
+; CHECK-NEXT: v_readlane_b32 s30, v6, 0
+; CHECK-NEXT: s_mov_b32 s32, s33
+; CHECK-NEXT: v_min_f64 v[0:1], v[2:3], v[0:1]
+; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
+; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: s_mov_b32 s33, s18
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %call0 = call nofpclass(nan) double @func_f64(ptr addrspace(1) %ptr)
+ %call1 = call nofpclass(nan) double @func_f64(ptr addrspace(1) %ptr)
+ %min = call double @llvm.minnum.f64(double %call0, double %call1)
+ ret double %min
+}
+
+define float @call_nofpclass_intrinsic_f32(float %x, float %y, float %z) {
+; CHECK-LABEL: call_nofpclass_intrinsic_f32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_sqrt_f32_e32 v0, v0
+; CHECK-NEXT: v_sqrt_f32_e32 v1, v1
+; CHECK-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %call0 = call nofpclass(nan) float @llvm.amdgcn.sqrt.f32(float %x)
+ %call1 = call nofpclass(nan) float @llvm.amdgcn.sqrt.f32(float %y)
+ %lt = fcmp olt float %call0, %call1
+ %min = select nsz i1 %lt, float %call0, float %call1
+ ret float %min
+}
+
+define <2 x half> @call_nofpclass_intrinsic_v2f16(float %x, float %y, float %z, float %w) {
+; CHECK-LABEL: call_nofpclass_intrinsic_v2f16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v1
+; CHECK-NEXT: v_cvt_pkrtz_f16_f32 v1, v2, v3
+; CHECK-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; CHECK-NEXT: v_cmp_lt_f16_e32 vcc, v0, v1
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
+; CHECK-NEXT: v_cmp_lt_f16_e32 vcc, v3, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
+; CHECK-NEXT: s_mov_b32 s4, 0x5040100
+; CHECK-NEXT: v_perm_b32 v0, v1, v0, s4
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %call0 = call nofpclass(nan) <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
+ %call1 = call nofpclass(nan) <2 x half> @llvm.amdgcn.cvt.pkrtz(float %z, float %w)
+ %lt = fcmp olt <2 x half> %call0, %call1
+ %min = select nsz <2 x i1> %lt, <2 x half> %call0, <2 x half> %call1
+ ret <2 x half> %min
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/167263
More information about the llvm-commits
mailing list