[llvm] d53699c - AMDGPU: Add some regression tests that infinite looped combiner

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 4 04:22:20 PST 2023


Author: Matt Arsenault
Date: 2023-02-04T08:21:18-04:00
New Revision: d53699cc4570b18e41dbda4a2ee643a8db5ef66f

URL: https://github.com/llvm/llvm-project/commit/d53699cc4570b18e41dbda4a2ee643a8db5ef66f
DIFF: https://github.com/llvm/llvm-project/commit/d53699cc4570b18e41dbda4a2ee643a8db5ef66f.diff

LOG: AMDGPU: Add some regression tests that infinite looped combiner

Prevent a future patch from introducing an infinite combine loop.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 3e066cfc81d6..cc32c25dab5b 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -2780,11 +2780,452 @@ define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x ha
   ret <2 x half> %add
 }
 
+; --------------------------------------------------------------------------------
+; select tests
+; --------------------------------------------------------------------------------
+
+define amdgpu_kernel void @s_fneg_select_infloop_regression_f32(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
+; SI-LABEL: s_fneg_select_infloop_regression_f32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    v_bfrev_b32_e32 v0, 1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitcmp1_b32 s1, 0
+; SI-NEXT:    v_mov_b32_e32 v1, s0
+; SI-NEXT:    s_cselect_b64 vcc, -1, 0
+; SI-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; SI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    flat_store_dword v[0:1], v2
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_fneg_select_infloop_regression_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_bfrev_b32_e32 v0, 1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_bitcmp1_b32 s1, 0
+; VI-NEXT:    v_mov_b32_e32 v1, s0
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; VI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+  %i = select i1 %arg1, float 0.0, float %arg
+  %i2 = fneg float %i
+  %i3 = select i1 %arg1, float 0.0, float %i2
+  store float %i3, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_bfrev_b32_e32 v1, 1
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %i = select i1 %arg1, float 0.0, float %arg
+  %i2 = fneg float %i
+  %i3 = select i1 %arg1, float 0.0, float %i2
+  ret float %i3
+}
+
+define amdgpu_kernel void @s_fneg_select_infloop_regression_f64(double %arg, i1 %arg1, ptr addrspace(1) %ptr) {
+; SI-LABEL: s_fneg_select_infloop_regression_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitcmp1_b32 s4, 0
+; SI-NEXT:    s_cselect_b32 s3, 0, s3
+; SI-NEXT:    s_cselect_b32 s2, 0, s2
+; SI-NEXT:    s_cselect_b32 s3, 0x80000000, s3
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    s_xor_b32 s2, s3, 0x80000000
+; SI-NEXT:    v_mov_b32_e32 v3, s1
+; SI-NEXT:    v_mov_b32_e32 v1, s2
+; SI-NEXT:    v_mov_b32_e32 v2, s0
+; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_fneg_select_infloop_regression_f64:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_bitcmp1_b32 s4, 0
+; VI-NEXT:    s_cselect_b32 s3, 0, s3
+; VI-NEXT:    s_cselect_b32 s2, 0, s2
+; VI-NEXT:    s_cselect_b32 s3, 0x80000000, s3
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    s_xor_b32 s2, s3, 0x80000000
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+  %i = select i1 %arg1, double 0.0, double %arg
+  %i2 = fneg double %i
+  %i3 = select i1 %arg1, double 0.0, double %i2
+  store double %i3, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_bfrev_b32_e32 v2, 1
+; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %i = select i1 %arg1, double 0.0, double %arg
+  %i2 = fneg double %i
+  %i3 = select i1 %arg1, double 0.0, double %i2
+  ret double %i3
+}
+
+define amdgpu_kernel void @s_fneg_select_infloop_regression_f16(half %arg, i1 %arg1, ptr addrspace(1) %ptr) {
+; SI-LABEL: s_fneg_select_infloop_regression_f16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s2, s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SI-NEXT:    v_bfrev_b32_e32 v1, 1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
+; SI-NEXT:    s_bitcmp1_b32 s2, 16
+; SI-NEXT:    s_cselect_b64 vcc, -1, 0
+; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT:    v_cvt_f16_f32_e64 v2, -v0
+; SI-NEXT:    v_mov_b32_e32 v0, s0
+; SI-NEXT:    v_mov_b32_e32 v1, s1
+; SI-NEXT:    flat_store_short v[0:1], v2
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_fneg_select_infloop_regression_f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s2, s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    v_mov_b32_e32 v0, 0x8000
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_bitcmp1_b32 s2, 16
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
+; VI-NEXT:    v_xor_b32_e32 v2, 0x8000, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    s_endpgm
+  %i = select i1 %arg1, half 0.0, half %arg
+  %i2 = fneg half %i
+  %i3 = select i1 %arg1, half 0.0, half %i2
+  store half %i3, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+define half @v_fneg_select_infloop_regression_f16(half %arg, i1 %arg1) {
+; SI-LABEL: v_fneg_select_infloop_regression_f16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-NEXT:    v_and_b32_e32 v1, 1, v1
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; SI-NEXT:    v_bfrev_b32_e32 v1, 1
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_select_infloop_regression_f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_and_b32_e32 v1, 1, v1
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_mov_b32_e32 v1, 0x8000
+; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %i = select i1 %arg1, half 0.0, half %arg
+  %i2 = fneg half %i
+  %i3 = select i1 %arg1, half 0.0, half %i2
+  ret half %i3
+}
+
+define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %arg1, ptr addrspace(1) %ptr) {
+; SI-LABEL: s_fneg_select_infloop_regression_v2f16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_and_b32 s1, 1, s1
+; SI-NEXT:    s_cselect_b32 s0, 0, s0
+; SI-NEXT:    s_xor_b32 s0, s0, 0x80008000
+; SI-NEXT:    s_cmp_eq_u32 s1, 1
+; SI-NEXT:    s_cselect_b32 s0, 0, s0
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    v_mov_b32_e32 v2, s0
+; SI-NEXT:    flat_store_dword v[0:1], v2
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_fneg_select_infloop_regression_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_and_b32 s1, 1, s1
+; VI-NEXT:    s_cselect_b32 s0, 0, s0
+; VI-NEXT:    s_xor_b32 s0, s0, 0x80008000
+; VI-NEXT:    s_cmp_eq_u32 s1, 1
+; VI-NEXT:    s_cselect_b32 s0, 0, s0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+  %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg
+  %i2 = fneg <2 x half> %i
+  %i3 = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %i2
+  store <2 x half> %i3, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %arg1) {
+; SI-LABEL: v_fneg_select_infloop_regression_v2f16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; SI-NEXT:    v_or_b32_e32 v0, v0, v1
+; SI-NEXT:    v_and_b32_e32 v1, 1, v2
+; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; SI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, vcc
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_fneg_select_infloop_regression_v2f16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_and_b32_e32 v1, 1, v1
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg
+  %i2 = fneg <2 x half> %i
+  %i3 = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %i2
+  ret <2 x half> %i3
+}
+
+define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 %arg1, ptr addrspace(1) %ptr) {
+; SI-LABEL: s_fneg_select_infloop_regression_v2f32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_and_b32 s4, 1, s4
+; SI-NEXT:    s_cselect_b32 s2, 0, s2
+; SI-NEXT:    s_xor_b32 s2, s2, 0x80000000
+; SI-NEXT:    s_cmp_eq_u32 s4, 1
+; SI-NEXT:    s_cselect_b32 s3, 0, s3
+; SI-NEXT:    s_cselect_b32 s2, 0, s2
+; SI-NEXT:    s_xor_b32 s3, s3, 0x80000000
+; SI-NEXT:    s_cmp_eq_u32 s4, 1
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    s_cselect_b32 s2, 0, s3
+; SI-NEXT:    v_mov_b32_e32 v3, s1
+; SI-NEXT:    v_mov_b32_e32 v1, s2
+; SI-NEXT:    v_mov_b32_e32 v2, s0
+; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_fneg_select_infloop_regression_v2f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_and_b32 s4, 1, s4
+; VI-NEXT:    s_cselect_b32 s2, 0, s2
+; VI-NEXT:    s_xor_b32 s2, s2, 0x80000000
+; VI-NEXT:    s_cmp_eq_u32 s4, 1
+; VI-NEXT:    s_cselect_b32 s3, 0, s3
+; VI-NEXT:    s_cselect_b32 s2, 0, s2
+; VI-NEXT:    s_xor_b32 s3, s3, 0x80000000
+; VI-NEXT:    s_cmp_eq_u32 s4, 1
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    s_cselect_b32 s2, 0, s3
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s2
+; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    s_endpgm
+  %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg
+  %i2 = fneg <2 x float> %i
+  %i3 = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %i2
+  store <2 x float> %i3, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_select_infloop_regression_v2f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg
+  %i2 = fneg <2 x float> %i
+  %i3 = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %i2
+  ret <2 x float> %i3
+}
+
+define amdgpu_kernel void @s_fabs_select_infloop_regression_f32(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
+; SI-LABEL: s_fabs_select_infloop_regression_f32:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitcmp1_b32 s1, 0
+; SI-NEXT:    v_mov_b32_e32 v0, s0
+; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
+; SI-NEXT:    v_and_b32_e32 v2, 0x7fffffff, v0
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    flat_store_dword v[0:1], v2
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_fabs_select_infloop_regression_f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_bitcmp1_b32 s1, 0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
+; VI-NEXT:    v_and_b32_e32 v2, 0x7fffffff, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+  %i = select i1 %arg1, float 0.0, float %arg
+  %i2 = call float @llvm.fabs.f32(float %i)
+  %i3 = select i1 %arg1, float 0.0, float %i2
+  store float %i3, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+define float @v_fabs_select_infloop_regression_f32(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fabs_select_infloop_regression_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %i = select i1 %arg1, float 0.0, float %arg
+  %i2 = call float @llvm.fabs.f32(float %i)
+  %i3 = select i1 %arg1, float 0.0, float %i2
+  ret float %i3
+}
+
+define amdgpu_kernel void @s_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
+; SI-LABEL: s_fneg_fabs_select_infloop_regression:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SI-NEXT:    v_bfrev_b32_e32 v0, 1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitcmp1_b32 s1, 0
+; SI-NEXT:    v_mov_b32_e32 v1, s0
+; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; SI-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; SI-NEXT:    v_cndmask_b32_e64 v0, |v1|, v0, s[0:1]
+; SI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    flat_store_dword v[0:1], v2
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: s_fneg_fabs_select_infloop_regression:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    v_bfrev_b32_e32 v0, 1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_bitcmp1_b32 s1, 0
+; VI-NEXT:    v_mov_b32_e32 v1, s0
+; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; VI-NEXT:    v_cndmask_b32_e64 v1, v1, 0, s[0:1]
+; VI-NEXT:    v_cndmask_b32_e64 v0, |v1|, v0, s[0:1]
+; VI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+  %i = select i1 %arg1, float 0.0, float %arg
+  %i2 = call float @llvm.fabs.f32(float %i)
+  %neg.i2 = fneg float %i2
+  %i3 = select i1 %arg1, float 0.0, float %neg.i2
+  store float %i3, ptr addrspace(1) %ptr, align 4
+  ret void
+}
+
+define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) {
+; GCN-LABEL: v_fneg_fabs_select_infloop_regression:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
+; GCN-NEXT:    v_bfrev_b32_e32 v1, 1
+; GCN-NEXT:    v_cndmask_b32_e64 v0, |v0|, v1, vcc
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %i = select i1 %arg1, float 0.0, float %arg
+  %i2 = call float @llvm.fabs.f32(float %i)
+  %neg.i2 = fneg float %i2
+  %i3 = select i1 %arg1, float 0.0, float %neg.i2
+  ret float %i3
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare float @llvm.fma.f32(float, float, float) #1
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
 declare float @llvm.fmuladd.f32(float, float, float) #1
 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
+declare float @llvm.fabs.f32(float) #1
 declare float @llvm.sin.f32(float) #1
 declare float @llvm.trunc.f32(float) #1
 declare float @llvm.round.f32(float) #1


        


More information about the llvm-commits mailing list