[llvm] [DAG] isKnownToBeAPowerOfTwo - Power of 2 value is known to be power of 2 after BSWAP/BITREVERSE (PR #182207)
Manuel Dun via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 06:21:49 PST 2026
https://github.com/manueldun updated https://github.com/llvm/llvm-project/pull/182207
>From b0697006456eeba909d127fe6ccd1f9848a6cbe6 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Fri, 27 Feb 2026 19:59:45 -0400
Subject: [PATCH 1/5] [DAG] isKnownToBeAPowerOfTwo function now handle BSwap
and BitReverse
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/test/CodeGen/X86/known-pow2.ll | 84 +++++++++++++++++++
2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4a2bd811b5214..17330665c822f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4732,7 +4732,9 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
case ISD::ROTL:
case ISD::ROTR:
- return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+ case ISD::BSWAP:
+ case ISD::BITREVERSE:
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,
Depth + 1);
case ISD::SMIN:
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 55d418b12f80f..2f5ef7dad1161 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -910,6 +910,46 @@ define i1 @pow2_and_i128(i128 %num, i128 %shift) {
ret i1 %bool
}
+define i1 @pow2_bswap(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bswap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: testl %eax, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %y = select i1 %c, i32 4, i32 2
+ %d = call i32 @llvm.bswap.i32(i32 %y)
+ %and = and i32 %x, %d
+ %r = icmp eq i32 %and, %d
+ ret i1 %r
+}
+
+define i1 @pow2_bitreverse(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bitreverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shll $6, %eax
+; CHECK-NEXT: leal (%rax,%rcx,4), %ecx
+; CHECK-NEXT: andl $268435456, %ecx # imm = 0x10000000
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: leal (%rax,%rcx,2), %eax
+; CHECK-NEXT: testl %eax, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %y = select i1 %c, i32 4, i32 2
+ %d = call i32 @llvm.bitreverse.i32(i32 %y)
+ %and = and i32 %x, %d
+ %r = icmp eq i32 %and, %d
+ ret i1 %r
+}
+
; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
define i32 @pow2_blsi_add_fail(i32 %x, i32 %a) {
; CHECK-LABEL: pow2_blsi_add_fail:
@@ -943,6 +983,50 @@ define i32 @pow2_blsi_add(i32 %x, i32 %a) {
ret i32 %r
}
+
+; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
+define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
+; CHECK-LABEL: pow2_rotl_orzero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: notb %sil
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: addl %eax, %eax
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: roll $3, %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+ %y = select i1 %c, i32 0, i32 2
+ %d = call i32 @llvm.bswap.i32(i32 %y)
+ %rot_y = call i32 @llvm.fshl.i32(i32 %d, i32 %d, i32 3)
+ %x_add_y = add i32 %x, %rot_y
+ %r = and i32 %x_add_y, %rot_y
+ ret i32 %r
+}
+
+
+; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
+define i32 @pow2_rotr_orzero(i32 %x, i1 %c) {
+; CHECK-LABEL: pow2_rotr_orzero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: notb %sil
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: addl %eax, %eax
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: rorl $3, %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+ %y = select i1 %c, i32 0, i32 2
+ %d = call i32 @llvm.bswap.i32(i32 %y)
+ %rot_y = call i32 @llvm.fshr.i32(i32 %d, i32 %d, i32 3)
+ %x_add_y = add i32 %x, %rot_y
+ %r = and i32 %x_add_y, %rot_y
+ ret i32 %r
+}
+
; Test that (X - Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
; CHECK-LABEL: pow2_blsi_sub:
>From d1c651209cb561fdf1181e750de564126a81a8ff Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Sun, 1 Mar 2026 21:18:59 -0400
Subject: [PATCH 2/5] Added test cases that use demanded elements for bswap and
bitreverse
---
llvm/test/CodeGen/X86/known-pow2.ll | 80 +++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 33767a8eac4e1..5d4f763fd7c81 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -974,6 +974,33 @@ define i1 @pow2_bswap(i32 %x,i1 %c){
ret i1 %r
}
+define i1 @pow2_bswap_extractelt(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bswap_extractelt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: testb $1, %sil
+; CHECK-NEXT: jne .LBB45_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = [4,3,0,0]
+; CHECK-NEXT: jmp .LBB45_3
+; CHECK-NEXT: .LBB45_1:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = [2,3,0,0]
+; CHECK-NEXT: .LBB45_3:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: packuswb %xmm0, %xmm0
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: testl %eax, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
+ %y_bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %y_sel)
+ %y_elt = extractelement <2 x i32> %y_bswap, i32 0
+ %and = and i32 %x, %y_elt
+ %r = icmp eq i32 %and, %y_elt
+ ret i1 %r
+}
+
define i1 @pow2_bitreverse(i32 %x,i1 %c){
; CHECK-LABEL: pow2_bitreverse:
; CHECK: # %bb.0:
@@ -997,6 +1024,59 @@ define i1 @pow2_bitreverse(i32 %x,i1 %c){
ret i1 %r
}
+define i1 @pow2_bitreverse_extractelt(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bitreverse_extractelt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: testb $1, %sil
+; CHECK-NEXT: jne .LBB47_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = [4,3,0,0]
+; CHECK-NEXT: jmp .LBB47_3
+; CHECK-NEXT: .LBB47_1:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = [2,3,0,0]
+; CHECK-NEXT: .LBB47_3:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $4, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: psllw $4, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $2, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: psllw $2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $1, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: paddb %xmm0, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: testl %eax, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
+ %y_rev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %y_sel)
+ %y_elt = extractelement <2 x i32> %y_rev, i32 0
+ %and = and i32 %x, %y_elt
+ %r = icmp eq i32 %and, %y_elt
+ ret i1 %r
+}
+
; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
define i32 @pow2_blsi_add_fail(i32 %x, i32 %a) {
; CHECK-LABEL: pow2_blsi_add_fail:
>From 0c80a77e1f75fa727d3d53c3b5f2dad38e5cbfae Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Mon, 2 Mar 2026 10:17:18 -0400
Subject: [PATCH 3/5] Changed to urem pattern instead
---
llvm/test/CodeGen/X86/known-pow2.ll | 201 +++++++++++++---------------
1 file changed, 92 insertions(+), 109 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 5d4f763fd7c81..eb5ca43bd4bfc 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -957,124 +957,105 @@ define i1 @pow2_and_i128(i128 %num, i128 %shift) {
ret i1 %bool
}
-define i1 @pow2_bswap(i32 %x,i1 %c){
+define i32 @pow2_bswap(i32 %a0, i32 %a1) {
; CHECK-LABEL: pow2_bswap:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT: andl $1, %esi
-; CHECK-NEXT: leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: leal 4(,%rdi,4), %eax
; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: retq
- %y = select i1 %c, i32 4, i32 2
- %d = call i32 @llvm.bswap.i32(i32 %y)
- %and = and i32 %x, %d
- %r = icmp eq i32 %and, %d
- ret i1 %r
+ %cmp = icmp sgt i32 0, %a0
+ %sel = select i1 %cmp, i32 4, i32 8
+ %swap = call i32 @llvm.bswap.i32(i32 %sel)
+ %res = urem i32 %a1, %swap
+ ret i32 %res
}
-define i1 @pow2_bswap_extractelt(i32 %x,i1 %c){
-; CHECK-LABEL: pow2_bswap_extractelt:
+define i32 @pow2_bswap_vec(<4 x i32> %a0, i32 %a1, ptr %p2) {
+; CHECK-LABEL: pow2_bswap_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: jne .LBB45_1
-; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: movq {{.*#+}} xmm0 = [4,3,0,0]
-; CHECK-NEXT: jmp .LBB45_3
-; CHECK-NEXT: .LBB45_1:
-; CHECK-NEXT: movq {{.*#+}} xmm0 = [2,3,0,0]
-; CHECK-NEXT: .LBB45_3:
; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-NEXT: packuswb %xmm0, %xmm0
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: packuswb %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, (%rsi)
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
- %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
- %y_bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %y_sel)
- %y_elt = extractelement <2 x i32> %y_bswap, i32 0
- %and = and i32 %x, %y_elt
- %r = icmp eq i32 %and, %y_elt
- ret i1 %r
+ %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+ %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %sel)
+ store <4 x i32> %swap, ptr %p2
+ %elt = extractelement <4 x i32> %swap, i32 0
+ %res = urem i32 %a1, %elt
+ ret i32 %res
}
-define i1 @pow2_bitreverse(i32 %x,i1 %c){
+define i32 @pow2_bitreverse(i32 %a0, i32 %a1) {
; CHECK-LABEL: pow2_bitreverse:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT: andl $1, %esi
-; CHECK-NEXT: leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: leal 4(,%rdi,4), %eax
; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: shll $6, %eax
-; CHECK-NEXT: leal (%rax,%rcx,4), %ecx
-; CHECK-NEXT: andl $268435456, %ecx # imm = 0x10000000
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: leal (%rax,%rcx,2), %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: retq
- %y = select i1 %c, i32 4, i32 2
- %d = call i32 @llvm.bitreverse.i32(i32 %y)
- %and = and i32 %x, %d
- %r = icmp eq i32 %and, %d
- ret i1 %r
+ %cmp = icmp sgt i32 0, %a0
+ %sel = select i1 %cmp, i32 4, i32 8
+ %swap = call i32 @llvm.bswap.i32(i32 %sel)
+ %rev = call i32 @llvm.bitreverse.i32(i32 %sel)
+ %res = urem i32 %a1, %swap
+ ret i32 %res
}
-define i1 @pow2_bitreverse_extractelt(i32 %x,i1 %c){
-; CHECK-LABEL: pow2_bitreverse_extractelt:
+define i32 @pow2_bitreverse_vec(<4 x i32> %a0, i32 %a1, ptr %p2) {
+; CHECK-LABEL: pow2_bitreverse_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: jne .LBB47_1
-; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: movq {{.*#+}} xmm0 = [4,3,0,0]
-; CHECK-NEXT: jmp .LBB47_3
-; CHECK-NEXT: .LBB47_1:
-; CHECK-NEXT: movq {{.*#+}} xmm0 = [2,3,0,0]
-; CHECK-NEXT: .LBB47_3:
; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: movdqa %xmm0, %xmm2
-; CHECK-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; CHECK-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
-; CHECK-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
-; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-NEXT: packuswb %xmm2, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlw $4, %xmm1
-; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; CHECK-NEXT: pand %xmm2, %xmm1
-; CHECK-NEXT: pand %xmm2, %xmm0
-; CHECK-NEXT: psllw $4, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlw $2, %xmm1
-; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; CHECK-NEXT: pand %xmm2, %xmm1
-; CHECK-NEXT: pand %xmm2, %xmm0
-; CHECK-NEXT: psllw $2, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlw $1, %xmm1
-; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; CHECK-NEXT: pand %xmm2, %xmm1
-; CHECK-NEXT: pand %xmm2, %xmm0
-; CHECK-NEXT: paddb %xmm0, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: packuswb %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, (%rsi)
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
- %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
- %y_rev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %y_sel)
- %y_elt = extractelement <2 x i32> %y_rev, i32 0
- %and = and i32 %x, %y_elt
- %r = icmp eq i32 %and, %y_elt
- ret i1 %r
+ %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+ %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %sel)
+ store <4 x i32> %swap, ptr %p2
+ %elt = extractelement <4 x i32> %swap, i32 0
+ %res = urem i32 %a1, %elt
+ ret i32 %res
}
; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
@@ -1115,14 +1096,15 @@ define i32 @pow2_blsi_add(i32 %x, i32 %a) {
define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
; CHECK-LABEL: pow2_rotl_orzero:
; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: notb %sil
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: addl %eax, %eax
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: roll $3, %eax
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: movzbl %sil, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: addl %ecx, %ecx
+; CHECK-NEXT: bswapl %ecx
+; CHECK-NEXT: roll $3, %ecx
+; CHECK-NEXT: leal (%rdi,%rcx), %eax
+; CHECK-NEXT: andl %ecx, %eax
; CHECK-NEXT: retq
%y = select i1 %c, i32 0, i32 2
%d = call i32 @llvm.bswap.i32(i32 %y)
@@ -1137,14 +1119,15 @@ define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
define i32 @pow2_rotr_orzero(i32 %x, i1 %c) {
; CHECK-LABEL: pow2_rotr_orzero:
; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: notb %sil
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: addl %eax, %eax
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: rorl $3, %eax
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: movzbl %sil, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: addl %ecx, %ecx
+; CHECK-NEXT: bswapl %ecx
+; CHECK-NEXT: rorl $3, %ecx
+; CHECK-NEXT: leal (%rdi,%rcx), %eax
+; CHECK-NEXT: andl %ecx, %eax
; CHECK-NEXT: retq
%y = select i1 %c, i32 0, i32 2
%d = call i32 @llvm.bswap.i32(i32 %y)
>From d83b79ba9930ad6079dadc6d548f4ec432e1c1b5 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Mon, 2 Mar 2026 10:19:37 -0400
Subject: [PATCH 4/5] Dropped rot tests
---
llvm/test/CodeGen/X86/known-pow2.ll | 34 -----------------------------
1 file changed, 34 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index eb5ca43bd4bfc..82f722add64b5 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -197,40 +197,6 @@ define i1 @pow2_srl_fail1(i32 %x, i32 %y) {
ret i1 %r
}
-define i1 @pow2_rotl(i32 %x, i32 %y) {
-; CHECK-LABEL: pow2_rotl:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: movl $1048576, %eax # imm = 0x100000
-; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: roll %cl, %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: retq
- %d = call i32 @llvm.fshl.i32(i32 1048576, i32 1048576, i32 %y)
- %and = and i32 %x, %d
- %r = icmp eq i32 %and, %d
- ret i1 %r
-}
-
-define i1 @pow2_rotl_fail0(i32 %x, i32 %y) {
-; CHECK-LABEL: pow2_rotl_fail0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %esi, %ecx
-; CHECK-NEXT: movl $1048576, %eax # imm = 0x100000
-; CHECK-NEXT: movl $512, %edx # imm = 0x200
-; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shldl %cl, %eax, %edx
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: testl %edi, %edx
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: retq
- %d = call i32 @llvm.fshl.i32(i32 512, i32 1048576, i32 %y)
- %and = and i32 %x, %d
- %r = icmp eq i32 %and, %d
- ret i1 %r
-}
-
define i1 @pow2_rotl_fail1(i32 %x, i32 %y) {
; CHECK-LABEL: pow2_rotl_fail1:
; CHECK: # %bb.0:
>From 6b5d997bd9bba7dcd129f6992362b6a5fe5a5c49 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Mon, 2 Mar 2026 10:20:38 -0400
Subject: [PATCH 5/5] Focusing on BSwap and BitReverse only
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b315b233eb4d6..7102e28c8f40b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4753,6 +4753,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
case ISD::ROTL:
case ISD::ROTR:
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+ Depth + 1);
case ISD::BSWAP:
case ISD::BITREVERSE:
return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,
More information about the llvm-commits
mailing list