[llvm] [SelectionDAG] Use getAllOnes to evaluate isKnownNeverZero (PR #92923)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 21 08:14:19 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: AtariDreams (AtariDreams)
<details>
<summary>Changes</summary>
This check was ported from ValueTracking to SelectionDAG.
---
Patch is 28.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92923.diff
8 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+8-2)
- (modified) llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll (+3-5)
- (modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+13-21)
- (modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (+18-35)
- (modified) llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll (+7-70)
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll (-4)
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+11-55)
- (modified) llvm/test/CodeGen/X86/known-pow2.ll (+19-25)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 777bbf071732e..78ba282e20a58 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5485,8 +5485,14 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return true;
// If max shift cnt of known ones is non-zero, result is non-zero.
APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue();
- if (MaxCnt.ult(ValKnown.getBitWidth()) &&
- !ValKnown.One.shl(MaxCnt).isZero())
+ unsigned NumBits = ValKnown.getBitWidth();
+ if (MaxCnt.ult(NumBits) && !ValKnown.One.shl(MaxCnt).isZero())
+ return true;
+
+ // If all of the bits shifted out are known to be zero, and Val is known
+ // non-zero then at least one non-zero bit must remain.
+ if (ValKnown.Zero.lshr(NumBits - MaxCnt)
+ .eq(APInt::getAllOnes(NumBits).lshr(NumBits - MaxCnt)))
return true;
break;
}
diff --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
index a78addc490086..f34dfbd48a3a6 100644
--- a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -495,11 +495,9 @@ define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nou
define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
; CHECK-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #8 // =0x8
-; CHECK-NEXT: fmov s1, #-9.00000000
-; CHECK-NEXT: lsl x8, x8, x0
-; CHECK-NEXT: ucvtf s0, x8
-; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: mov w8, #-1081081856 // =0xbf900000
+; CHECK-NEXT: sub w8, w8, w0, lsl #23
+; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
%shl = shl i64 8, %cnt
%conv = uitofp i64 %shl to float
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index 756b819099682..4314605b2b9f4 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -804,7 +804,6 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_with_select(ptr addrspace(1) no
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_ffbh_u32_e32 v0, v0
-; SI-NEXT: v_min_u32_e32 v0, 32, v0
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
@@ -839,8 +838,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_with_select(ptr addrspace(1) no
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_or_b32_e32 v0, v2, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
-; VI-NEXT: v_ffbh_u32_e32 v0, v0
-; VI-NEXT: v_min_u32_e32 v2, 32, v0
+; VI-NEXT: v_ffbh_u32_e32 v2, v0
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
@@ -850,7 +848,7 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_with_select(ptr addrspace(1) no
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 6, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
@@ -863,10 +861,9 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i32_with_select(ptr addrspace(1) no
; EG-NEXT: LSHL * T0.W, T1.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: OR_INT * T0.W, PV.W, T0.X,
-; EG-NEXT: FFBH_UINT * T1.W, PV.W,
-; EG-NEXT: CNDE_INT T0.X, T0.W, literal.x, PV.W,
-; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
-; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
+; EG-NEXT: FFBH_UINT T0.X, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX9-GISEL-LABEL: v_ctlz_zero_undef_i32_with_select:
; GFX9-GISEL: ; %bb.0:
@@ -940,7 +937,6 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_with_select(ptr addrspace(1) no
; SI-NEXT: v_ffbh_u32_e32 v0, v0
; SI-NEXT: v_add_i32_e32 v1, vcc, 32, v1
; SI-NEXT: v_min_u32_e32 v0, v1, v0
-; SI-NEXT: v_min_u32_e32 v0, 64, v0
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
@@ -1011,7 +1007,6 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_with_select(ptr addrspace(1) no
; VI-NEXT: v_add_u32_e32 v0, vcc, 32, v0
; VI-NEXT: v_min_u32_e32 v0, v0, v3
; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_min_u32_e32 v0, 64, v0
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
@@ -1020,27 +1015,24 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i64_with_select(ptr addrspace(1) no
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 15, @15, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 12, @15, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1
; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 6, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_16 T3.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 6, #1
; EG-NEXT: ALU clause starting at 14:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 15:
-; EG-NEXT: LSHL * T0.W, T1.X, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PV.W, T0.X,
-; EG-NEXT: FFBH_UINT T1.W, PV.W,
-; EG-NEXT: LSHL * T2.W, T3.X, literal.x,
+; EG-NEXT: LSHL T0.W, T1.X, literal.x,
+; EG-NEXT: LSHL * T1.W, T0.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T0.W, T0.W, literal.x, PV.W,
-; EG-NEXT: OR_INT * T1.W, PS, T2.X,
-; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T3.X,
+; EG-NEXT: FFBH_UINT T0.W, PV.W,
+; EG-NEXT: OR_INT * T1.W, T1.W, T2.X,
; EG-NEXT: FFBH_UINT T2.W, PS,
; EG-NEXT: ADD_INT * T0.W, PV.W, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index 392a44318b0a5..32de87357f86f 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -770,7 +770,6 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(ptr addrspace(1) no
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_ffbl_b32_e32 v0, v0
-; SI-NEXT: v_min_u32_e32 v0, 32, v0
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
@@ -805,8 +804,7 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(ptr addrspace(1) no
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_or_b32_e32 v0, v2, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
-; VI-NEXT: v_ffbl_b32_e32 v0, v0
-; VI-NEXT: v_min_u32_e32 v2, 32, v0
+; VI-NEXT: v_ffbl_b32_e32 v2, v0
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
@@ -816,7 +814,7 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(ptr addrspace(1) no
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 6, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
@@ -829,10 +827,9 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(ptr addrspace(1) no
; EG-NEXT: LSHL * T0.W, T1.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: OR_INT * T0.W, PV.W, T0.X,
-; EG-NEXT: FFBL_INT * T1.W, PV.W,
-; EG-NEXT: CNDE_INT T0.X, T0.W, literal.x, PV.W,
-; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
-; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45)
+; EG-NEXT: FFBL_INT T0.X, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
;
; GFX9-GISEL-LABEL: v_cttz_zero_undef_i32_with_select:
; GFX9-GISEL: ; %bb.0:
@@ -906,7 +903,6 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(ptr addrspace(1) no
; SI-NEXT: v_ffbl_b32_e32 v0, v0
; SI-NEXT: v_add_i32_e32 v0, vcc, 32, v0
; SI-NEXT: v_min_u32_e32 v0, v0, v1
-; SI-NEXT: v_min_u32_e32 v0, 64, v0
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
@@ -977,7 +973,6 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(ptr addrspace(1) no
; VI-NEXT: v_ffbl_b32_e32 v0, v0
; VI-NEXT: v_min_u32_e32 v0, v3, v0
; VI-NEXT: v_mov_b32_e32 v3, s1
-; VI-NEXT: v_min_u32_e32 v0, 64, v0
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
@@ -986,27 +981,24 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(ptr addrspace(1) no
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 3 @6
-; EG-NEXT: ALU 15, @15, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 12, @15, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
; EG-NEXT: Fetch clause starting at 6:
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 6, #1
; EG-NEXT: VTX_READ_16 T2.X, T0.X, 0, #1
-; EG-NEXT: VTX_READ_16 T3.X, T0.X, 2, #1
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 4, #1
+; EG-NEXT: VTX_READ_16 T3.X, T0.X, 4, #1
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
; EG-NEXT: ALU clause starting at 14:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 15:
-; EG-NEXT: LSHL * T0.W, T1.X, literal.x,
-; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: OR_INT * T0.W, PV.W, T0.X,
-; EG-NEXT: FFBL_INT T1.W, PV.W,
-; EG-NEXT: LSHL * T2.W, T3.X, literal.x,
+; EG-NEXT: LSHL T0.W, T1.X, literal.x,
+; EG-NEXT: LSHL * T1.W, T0.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
-; EG-NEXT: CNDE_INT T0.W, T0.W, literal.x, PV.W,
-; EG-NEXT: OR_INT * T1.W, PS, T2.X,
-; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT * T0.W, PV.W, T3.X,
+; EG-NEXT: FFBL_INT T0.W, PV.W,
+; EG-NEXT: OR_INT * T1.W, T1.W, T2.X,
; EG-NEXT: FFBL_INT T2.W, PS,
; EG-NEXT: ADD_INT * T0.W, PV.W, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
@@ -1130,7 +1122,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out,
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 8, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 6, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
@@ -1144,8 +1136,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(ptr addrspace(1) noalias %out,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: OR_INT * T0.W, PV.W, T0.X,
; EG-NEXT: FFBL_INT * T1.W, PV.W,
-; EG-NEXT: CNDE_INT * T1.W, T0.W, literal.x, PV.W,
-; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: CNDE_INT T0.X, T0.W, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45)
@@ -1252,7 +1242,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out,
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 8, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 6, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
@@ -1266,8 +1256,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(ptr addrspace(1) noalias %out,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: OR_INT * T0.W, PV.W, T0.X,
; EG-NEXT: FFBL_INT * T1.W, PV.W,
-; EG-NEXT: CNDE_INT * T1.W, T0.W, literal.x, PV.W,
-; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: CNDE_INT T0.X, T0.W, literal.x, PV.W,
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
; EG-NEXT: -1(nan), 2(2.802597e-45)
@@ -1330,7 +1318,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v0, v1, v0
; SI-NEXT: v_ffbl_b32_e32 v0, v0
-; SI-NEXT: v_min_u32_e32 v0, 32, v0
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 32, v0
; SI-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
@@ -1368,7 +1355,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; VI-NEXT: v_or_b32_e32 v0, v2, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
; VI-NEXT: v_ffbl_b32_e32 v0, v0
-; VI-NEXT: v_min_u32_e32 v0, 32, v0
; VI-NEXT: v_cmp_ne_u32_e32 vcc, 32, v0
; VI-NEXT: v_cndmask_b32_e32 v2, -1, v0, vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
@@ -1380,7 +1366,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 1 @6
-; EG-NEXT: ALU 10, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 8, @11, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
; EG-NEXT: CF_END
; EG-NEXT: PAD
@@ -1393,9 +1379,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; EG-NEXT: LSHL * T0.W, T1.X, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: OR_INT * T0.W, PV.W, T0.X,
-; EG-NEXT: FFBL_INT * T1.W, PV.W,
-; EG-NEXT: CNDE_INT * T0.W, T0.W, literal.x, PV.W,
-; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
+; EG-NEXT: FFBL_INT * T0.W, PV.W,
; EG-NEXT: SETNE_INT * T1.W, PV.W, literal.x,
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
; EG-NEXT: CNDE_INT T0.X, PV.W, literal.x, T0.W,
@@ -1559,8 +1543,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_or_b32_e32 v0, v2, v0
-; VI-NEXT: v_or_b32_e32 v2, 0x10000, v0
-; VI-NEXT: v_ffbl_b32_e32 v2, v2
+; VI-NEXT: v_ffbl_b32_e32 v2, v0
; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0
; VI-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc
; VI-NEXT: v_mov_b32_e32 v0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
index 7c5f6d5e33efe..afa797730294f 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -2249,86 +2249,23 @@ define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
; VI-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_lshlrev_b64 v[0:1], v0, 8
-; VI-NEXT: s_mov_b32 s6, 0xc1100000
-; VI-NEXT: v_ffbh_u32_e32 v2, v1
-; VI-NEXT: v_min_u32_e32 v2, 32, v2
-; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; VI-NEXT: v_min_u32_e32 v0, 1, v0
-; VI-NEXT: v_or_b32_e32 v0, v1, v0
-; VI-NEXT: v_cvt_f32_u32_e32 v0, v0
-; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
-; VI-NEXT: v_ldexp_f32 v0, v0, v1
-; VI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6
-; VI-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6
-; VI-NEXT: v_rcp_f32_e32 v3, v1
-; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0
-; VI-NEXT: v_fma_f32 v3, v4, v3, v3
-; VI-NEXT: v_mul_f32_e32 v4, v2, v3
-; VI-NEXT: v_fma_f32 v5, -v1, v4, v2
-; VI-NEXT: v_fma_f32 v4, v5, v3, v4
-; VI-NEXT: v_fma_f32 v1, -v1, v4, v2
-; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4
-; VI-NEXT: v_div_fixup_f32 v0, v1, v0, s6
+; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
+; VI-NEXT: v_sub_u32_e32 v0, vcc, 0xbf900000, v0
; VI-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, 8
-; GFX10-NEXT: v_ffbh_u32_e32 v2, v1
-; GFX10-NEXT: v_min_u32_e32 v2, 32, v2
-; GFX10-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GFX10-NEXT: v_min_u32_e32 v0, 1, v0
-; GFX10-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX10-NEXT: v_sub_nc_u32_e32 v1, 32, v2
-; GFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX10-NEXT: v_div_scale_f32 v1, s4, v0, v0, 0xc1100000
-; GFX10-NEXT: v_rcp_f32_e32 v2, v1
-; GFX10-NEXT: v_fma_f32 v3, -v1, v2, 1.0
-; GFX10-NEXT: v_fmac_f32_e32 v2, v3, v2
-; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, 0xc1100000, v0, 0xc1100000
-; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2
-; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3
-; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2
-; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3
-; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4
-; GFX10-NEXT: v_div_fixup_f32 v0, v1, v0, 0xc1100000
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 23, v0
+; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0xbf900000, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, 8
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_clz_i32_u32_e32 v2, v1
-; GFX11-NEXT: v_min_u32_e32 v2, 32, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GFX11-NEXT: v_min_u32_e32 v0, 1, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX11-NEXT: v_sub_nc_u32_e32 v1, 32, v2
-; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 0xc1100000
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_rcp_f32_e32 v2, v1
-; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0
-; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2
-; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 0xc1100000, v0, 0xc1100000
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2
-; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2
-; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4
-; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 0xc1100000
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 23, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0xbf900000, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%shl = shl i64 8, %cnt
%conv = uitofp i64 %shl to float
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll
index 9d6e0927b0dfd..d86c23e98c19f 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll
@@ -119,7 +119,6 @@ define amdgpu_kernel void @ctlz_i64(ptr addrspace(1) noalias %out, ptr addrspace
; GFX9-NEXT: v_ffbh_u32_e32 v0, v0
; GFX9-NEXT: v_add_u32_e64 v2, v2, 32 clamp
; GFX9-NEXT: v_min_u32_e32 v0, v2, v0
-; GFX9-NEXT: v_min_u32_e32 v0, 64, v0
; GFX9-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
; GFX9-NEXT: s_endpgm
;
@@ -156,7 +155,6 @@ define amdgpu_kernel void @ctlz_i64(ptr addrspace(1) noalias %out, ptr addrspace
; GFX10-NEXT: v_ffbh_u32_e32 v0, v0
; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, 32 clamp
; GFX10-NEXT: v_mi...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/92923
More information about the llvm-commits
mailing list