[llvm] [AMDGPU] adjust tests to prevent fpclass bitcast folding (PR #106268)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 11:35:56 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
Make some minor tweaks to AMDGPU tests to ensure they still work as intended after https://github.com/llvm/llvm-project/pull/97762. These tests can be radically simplified after bitcast aware fpclass deduction.
---
Full diff: https://github.com/llvm/llvm-project/pull/106268.diff
2 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/anyext.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll (+6-6)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/anyext.ll b/llvm/test/CodeGen/AMDGPU/anyext.ll
index 1f8da18cdd3014..8b6c8be9f37882 100644
--- a/llvm/test/CodeGen/AMDGPU/anyext.ll
+++ b/llvm/test/CodeGen/AMDGPU/anyext.ll
@@ -152,7 +152,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: buffer_load_ushort v0, off, s[0:3], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_and_b32_e32 v0, 0x8000, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0x8001, v0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
@@ -164,7 +164,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], 0
-; GFX8-NEXT: v_mov_b32_e32 v1, 0x8000
+; GFX8-NEXT: v_mov_b32_e32 v1, 0x8001
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
@@ -179,7 +179,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_and_b32_e32 v0, 0x80008000, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0x80018001, v0
; GFX9-NEXT: v_bfi_b32 v0, v1, 0, v0
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
@@ -188,7 +188,7 @@ define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 {
bb:
%tmp = load i16, ptr addrspace(1) undef, align 2
%tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 1
- %tmp4 = and <2 x i16> %tmp2, <i16 -32768, i16 -32768>
+ %tmp4 = and <2 x i16> %tmp2, <i16 -32767, i16 -32767>
%tmp5 = zext <2 x i16> %tmp4 to <2 x i32>
%tmp6 = shl nuw <2 x i32> %tmp5, <i32 16, i32 16>
%tmp7 = or <2 x i32> zeroinitializer, %tmp6
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
index 63ccaafeda88f4..98b17bbaa0a959 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -1553,10 +1553,10 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_cmp_nlt_f32_e64 s[0:1], s0, 0
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GCN-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
+; GCN-NEXT: v_cmp_nge_f32_e32 vcc, 0, v0
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc
; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
+; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v0
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: s_endpgm
;
@@ -1567,11 +1567,11 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
; GFX11-NEXT: v_cmp_nlt_f32_e64 s0, s0, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cmp_nge_f32_e32 vcc_lo, 0, v0
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_mul_f32_e64 v0, -v0, v1
-; GFX11-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0, v0
+; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 0, v0
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
; GFX11-NEXT: s_endpgm
.entry:
@@ -1579,11 +1579,11 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
%.i2379 = select i1 %i, i32 1, i32 0
%.i0436 = bitcast i32 %.i2379 to float
%.i0440 = fneg float %.i0436
- %i1 = fcmp uge float %.i0436, 0.000000e+00
+ %i1 = fcmp ugt float %.i0436, 0.000000e+00
%.i2495 = select i1 %i1, i32 %.i2379, i32 0
%.i0552 = bitcast i32 %.i2495 to float
%.i0592 = fmul float %.i0440, %.i0552
- %.i0721 = fcmp ogt float %.i0592, 0.000000e+00
+ %.i0721 = fcmp oge float %.i0592, 0.000000e+00
br i1 %.i0721, label %bb5, label %bb
bb: ; preds = %.entry
``````````
</details>
https://github.com/llvm/llvm-project/pull/106268
More information about the llvm-commits
mailing list