[llvm] eaa3ef9 - [DAG] Propagate OrZero and DemandedElts for min/max in isKnownToBeAPowerOfTwo (#182369)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 04:50:16 PDT 2026
Author: dibrinsofor
Date: 2026-04-02T12:50:11+01:00
New Revision: eaa3ef9ddc20edbe6a99be7ea53d0d2ea0585f77
URL: https://github.com/llvm/llvm-project/commit/eaa3ef9ddc20edbe6a99be7ea53d0d2ea0585f77
DIFF: https://github.com/llvm/llvm-project/commit/eaa3ef9ddc20edbe6a99be7ea53d0d2ea0585f77.diff
LOG: [DAG] Propagate OrZero and DemandedElts for min/max in isKnownToBeAPowerOfTwo (#182369)
Fixes #181643
For queries like `isKnownToBeAPowerOfTwo(V, OrZero=true)`, if an operand
is known to be "pow2-or-zero" but not strictly non-zero power-of-two,
the min/max case currently returns false even when the result remains
pow2-or-zero.
For instance:
- `A = select cond, 4, 0` (A is pow2-or-zero)
- `R = umin(A, 16)`
`R` is always in `{0, 4}` and querying `isKnownToBeAPowerOfTwo(R,
OrZero=true)` should be true.
Added unitests for baseline and failing case and now propagating
correctly to `OrZero` and `DemandedElts`
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/known-pow2.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2ff17655ed788..60b1fc6a5166b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4821,9 +4821,9 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
- return isKnownToBeAPowerOfTwo(Val.getOperand(1), /*OrZero=*/false,
+ return isKnownToBeAPowerOfTwo(Val.getOperand(1), DemandedElts, OrZero,
Depth + 1) &&
- isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+ isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,
Depth + 1);
case ISD::SELECT:
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 5c395ec423505..629d7d9e4ae65 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -352,7 +352,6 @@ define i1 @pow2_umin(i32 %x, i32 %y) {
define i32 @pow2_umin_vec(<4 x i32> %x, <4 x i32> %y, i32 %z, ptr %p) {
; CHECK-LABEL: pow2_umin_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: pslld $23, %xmm0
; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
@@ -371,10 +370,9 @@ define i32 @pow2_umin_vec(<4 x i32> %x, <4 x i32> %y, i32 %z, ptr %p) {
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NEXT: por %xmm2, %xmm1
; CHECK-NEXT: movdqa %xmm1, (%rsi)
-; CHECK-NEXT: movd %xmm1, %ecx
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: divl %ecx
-; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%yy = shl <4 x i32> <i32 1, i32 -1, i32 -1, i32 -1>, %x
%d = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %yy, <4 x i32> splat (i32 256))
@@ -452,7 +450,6 @@ define i1 @pow2_umax(i32 %x, i32 %y, i32 %z) {
define i32 @pow2_umax_vec(<4 x i32> %x, <4 x i32> %y, i32 %z, ptr %p) {
; CHECK-LABEL: pow2_umax_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[2,3,3,3,4,5,6,7]
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4096,4294967295,4294967295,4294967295]
; CHECK-NEXT: movdqa %xmm2, %xmm3
@@ -476,10 +473,9 @@ define i32 @pow2_umax_vec(<4 x i32> %x, <4 x i32> %y, i32 %z, ptr %p) {
; CHECK-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: orps %xmm4, %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsi)
-; CHECK-NEXT: movd %xmm0, %ecx
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: divl %ecx
-; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%yy = lshr <4 x i32> <i32 4096, i32 -1, i32 -1, i32 -1>, %x
%d = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %yy, <4 x i32> splat (i32 256))
@@ -560,7 +556,6 @@ define i1 @pow2_smin(i32 %x, i32 %y) {
define i32 @pow2_smin_vec(<4 x i32> %x, <4 x i32> %y, i32 %z, ptr %p) {
; CHECK-LABEL: pow2_smin_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: pslld $23, %xmm0
; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
@@ -578,10 +573,9 @@ define i32 @pow2_smin_vec(<4 x i32> %x, <4 x i32> %y, i32 %z, ptr %p) {
; CHECK-NEXT: pandn %xmm1, %xmm2
; CHECK-NEXT: por %xmm0, %xmm2
; CHECK-NEXT: movdqa %xmm2, (%rsi)
-; CHECK-NEXT: movd %xmm2, %ecx
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: divl %ecx
-; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: movd %xmm2, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%yy = shl <4 x i32> <i32 1, i32 -1, i32 -1, i32 -1>, %x
%d = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %yy, <4 x i32> splat (i32 256))
@@ -659,7 +653,6 @@ define i1 @pow2_smax(i32 %x, i32 %y, i32 %z) {
define i32 @pow2_smax_vec(<4 x i32> %x, <4 x i32> %y, i32 %z, ptr %p) {
; CHECK-LABEL: pow2_smax_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[2,3,3,3,4,5,6,7]
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4096,4294967295,4294967295,4294967295]
; CHECK-NEXT: movdqa %xmm2, %xmm3
@@ -683,10 +676,9 @@ define i32 @pow2_smax_vec(<4 x i32> %x, <4 x i32> %y, i32 %z, ptr %p) {
; CHECK-NEXT: pandn %xmm0, %xmm1
; CHECK-NEXT: por %xmm4, %xmm1
; CHECK-NEXT: movdqa %xmm1, (%rsi)
-; CHECK-NEXT: movd %xmm1, %ecx
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: divl %ecx
-; CHECK-NEXT: movl %edx, %eax
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%yy = lshr <4 x i32> <i32 4096, i32 -1, i32 -1, i32 -1>, %x
%d = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %yy, <4 x i32> splat (i32 256))
More information about the llvm-commits
mailing list