[llvm] [DAG] isKnownToBeAPowerOfTwo - Power of 2 value is known to be power of 2 after BSWAP/BITREVERSE (PR #182207)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 11:51:24 PST 2026
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/182207
>From b0697006456eeba909d127fe6ccd1f9848a6cbe6 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Fri, 27 Feb 2026 19:59:45 -0400
Subject: [PATCH 1/6] [DAG] isKnownToBeAPowerOfTwo function now handle BSwap
and BitReverse
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
llvm/test/CodeGen/X86/known-pow2.ll | 84 +++++++++++++++++++
2 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4a2bd811b5214..17330665c822f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4732,7 +4732,9 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
case ISD::ROTL:
case ISD::ROTR:
- return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+ case ISD::BSWAP:
+ case ISD::BITREVERSE:
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,
Depth + 1);
case ISD::SMIN:
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 55d418b12f80f..2f5ef7dad1161 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -910,6 +910,46 @@ define i1 @pow2_and_i128(i128 %num, i128 %shift) {
ret i1 %bool
}
+define i1 @pow2_bswap(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bswap:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: testl %eax, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %y = select i1 %c, i32 4, i32 2
+ %d = call i32 @llvm.bswap.i32(i32 %y)
+ %and = and i32 %x, %d
+ %r = icmp eq i32 %and, %d
+ ret i1 %r
+}
+
+define i1 @pow2_bitreverse(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bitreverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shll $6, %eax
+; CHECK-NEXT: leal (%rax,%rcx,4), %ecx
+; CHECK-NEXT: andl $268435456, %ecx # imm = 0x10000000
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: leal (%rax,%rcx,2), %eax
+; CHECK-NEXT: testl %eax, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %y = select i1 %c, i32 4, i32 2
+ %d = call i32 @llvm.bitreverse.i32(i32 %y)
+ %and = and i32 %x, %d
+ %r = icmp eq i32 %and, %d
+ ret i1 %r
+}
+
; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
define i32 @pow2_blsi_add_fail(i32 %x, i32 %a) {
; CHECK-LABEL: pow2_blsi_add_fail:
@@ -943,6 +983,50 @@ define i32 @pow2_blsi_add(i32 %x, i32 %a) {
ret i32 %r
}
+
+; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
+define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
+; CHECK-LABEL: pow2_rotl_orzero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: notb %sil
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: addl %eax, %eax
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: roll $3, %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+ %y = select i1 %c, i32 0, i32 2
+ %d = call i32 @llvm.bswap.i32(i32 %y)
+ %rot_y = call i32 @llvm.fshl.i32(i32 %d, i32 %d, i32 3)
+ %x_add_y = add i32 %x, %rot_y
+ %r = and i32 %x_add_y, %rot_y
+ ret i32 %r
+}
+
+
+; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
+define i32 @pow2_rotr_orzero(i32 %x, i1 %c) {
+; CHECK-LABEL: pow2_rotr_orzero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: notb %sil
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: addl %eax, %eax
+; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: rorl $3, %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+ %y = select i1 %c, i32 0, i32 2
+ %d = call i32 @llvm.bswap.i32(i32 %y)
+ %rot_y = call i32 @llvm.fshr.i32(i32 %d, i32 %d, i32 3)
+ %x_add_y = add i32 %x, %rot_y
+ %r = and i32 %x_add_y, %rot_y
+ ret i32 %r
+}
+
; Test that (X - Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
; CHECK-LABEL: pow2_blsi_sub:
>From d1c651209cb561fdf1181e750de564126a81a8ff Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Sun, 1 Mar 2026 21:18:59 -0400
Subject: [PATCH 2/6] Added test cases that use demanded elements for bswap and
bitreverse
---
llvm/test/CodeGen/X86/known-pow2.ll | 80 +++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 33767a8eac4e1..5d4f763fd7c81 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -974,6 +974,33 @@ define i1 @pow2_bswap(i32 %x,i1 %c){
ret i1 %r
}
+define i1 @pow2_bswap_extractelt(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bswap_extractelt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: testb $1, %sil
+; CHECK-NEXT: jne .LBB45_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = [4,3,0,0]
+; CHECK-NEXT: jmp .LBB45_3
+; CHECK-NEXT: .LBB45_1:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = [2,3,0,0]
+; CHECK-NEXT: .LBB45_3:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: packuswb %xmm0, %xmm0
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: testl %eax, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
+ %y_bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %y_sel)
+ %y_elt = extractelement <2 x i32> %y_bswap, i32 0
+ %and = and i32 %x, %y_elt
+ %r = icmp eq i32 %and, %y_elt
+ ret i1 %r
+}
+
define i1 @pow2_bitreverse(i32 %x,i1 %c){
; CHECK-LABEL: pow2_bitreverse:
; CHECK: # %bb.0:
@@ -997,6 +1024,59 @@ define i1 @pow2_bitreverse(i32 %x,i1 %c){
ret i1 %r
}
+define i1 @pow2_bitreverse_extractelt(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bitreverse_extractelt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: testb $1, %sil
+; CHECK-NEXT: jne .LBB47_1
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = [4,3,0,0]
+; CHECK-NEXT: jmp .LBB47_3
+; CHECK-NEXT: .LBB47_1:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = [2,3,0,0]
+; CHECK-NEXT: .LBB47_3:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $4, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: psllw $4, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $2, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: psllw $2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $1, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: paddb %xmm0, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: testl %eax, %edi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
+ %y_rev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %y_sel)
+ %y_elt = extractelement <2 x i32> %y_rev, i32 0
+ %and = and i32 %x, %y_elt
+ %r = icmp eq i32 %and, %y_elt
+ ret i1 %r
+}
+
; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
define i32 @pow2_blsi_add_fail(i32 %x, i32 %a) {
; CHECK-LABEL: pow2_blsi_add_fail:
>From 0c80a77e1f75fa727d3d53c3b5f2dad38e5cbfae Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Mon, 2 Mar 2026 10:17:18 -0400
Subject: [PATCH 3/6] Changed to urem pattern instead
---
llvm/test/CodeGen/X86/known-pow2.ll | 201 +++++++++++++---------------
1 file changed, 92 insertions(+), 109 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 5d4f763fd7c81..eb5ca43bd4bfc 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -957,124 +957,105 @@ define i1 @pow2_and_i128(i128 %num, i128 %shift) {
ret i1 %bool
}
-define i1 @pow2_bswap(i32 %x,i1 %c){
+define i32 @pow2_bswap(i32 %a0, i32 %a1) {
; CHECK-LABEL: pow2_bswap:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT: andl $1, %esi
-; CHECK-NEXT: leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: leal 4(,%rdi,4), %eax
; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: retq
- %y = select i1 %c, i32 4, i32 2
- %d = call i32 @llvm.bswap.i32(i32 %y)
- %and = and i32 %x, %d
- %r = icmp eq i32 %and, %d
- ret i1 %r
+ %cmp = icmp sgt i32 0, %a0
+ %sel = select i1 %cmp, i32 4, i32 8
+ %swap = call i32 @llvm.bswap.i32(i32 %sel)
+ %res = urem i32 %a1, %swap
+ ret i32 %res
}
-define i1 @pow2_bswap_extractelt(i32 %x,i1 %c){
-; CHECK-LABEL: pow2_bswap_extractelt:
+define i32 @pow2_bswap_vec(<4 x i32> %a0, i32 %a1, ptr %p2) {
+; CHECK-LABEL: pow2_bswap_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: jne .LBB45_1
-; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: movq {{.*#+}} xmm0 = [4,3,0,0]
-; CHECK-NEXT: jmp .LBB45_3
-; CHECK-NEXT: .LBB45_1:
-; CHECK-NEXT: movq {{.*#+}} xmm0 = [2,3,0,0]
-; CHECK-NEXT: .LBB45_3:
; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-NEXT: packuswb %xmm0, %xmm0
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: packuswb %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, (%rsi)
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
- %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
- %y_bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %y_sel)
- %y_elt = extractelement <2 x i32> %y_bswap, i32 0
- %and = and i32 %x, %y_elt
- %r = icmp eq i32 %and, %y_elt
- ret i1 %r
+ %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+ %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %sel)
+ store <4 x i32> %swap, ptr %p2
+ %elt = extractelement <4 x i32> %swap, i32 0
+ %res = urem i32 %a1, %elt
+ ret i32 %res
}
-define i1 @pow2_bitreverse(i32 %x,i1 %c){
+define i32 @pow2_bitreverse(i32 %a0, i32 %a1) {
; CHECK-LABEL: pow2_bitreverse:
; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT: andl $1, %esi
-; CHECK-NEXT: leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: shrl $31, %edi
+; CHECK-NEXT: leal 4(,%rdi,4), %eax
; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: shll $6, %eax
-; CHECK-NEXT: leal (%rax,%rcx,4), %ecx
-; CHECK-NEXT: andl $268435456, %ecx # imm = 0x10000000
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: leal (%rax,%rcx,2), %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: retq
- %y = select i1 %c, i32 4, i32 2
- %d = call i32 @llvm.bitreverse.i32(i32 %y)
- %and = and i32 %x, %d
- %r = icmp eq i32 %and, %d
- ret i1 %r
+ %cmp = icmp sgt i32 0, %a0
+ %sel = select i1 %cmp, i32 4, i32 8
+ %swap = call i32 @llvm.bswap.i32(i32 %sel)
+ %rev = call i32 @llvm.bitreverse.i32(i32 %sel)
+ %res = urem i32 %a1, %swap
+ ret i32 %res
}
-define i1 @pow2_bitreverse_extractelt(i32 %x,i1 %c){
-; CHECK-LABEL: pow2_bitreverse_extractelt:
+define i32 @pow2_bitreverse_vec(<4 x i32> %a0, i32 %a1, ptr %p2) {
+; CHECK-LABEL: pow2_bitreverse_vec:
; CHECK: # %bb.0:
-; CHECK-NEXT: testb $1, %sil
-; CHECK-NEXT: jne .LBB47_1
-; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: movq {{.*#+}} xmm0 = [4,3,0,0]
-; CHECK-NEXT: jmp .LBB47_3
-; CHECK-NEXT: .LBB47_1:
-; CHECK-NEXT: movq {{.*#+}} xmm0 = [2,3,0,0]
-; CHECK-NEXT: .LBB47_3:
; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: movdqa %xmm0, %xmm2
-; CHECK-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; CHECK-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
-; CHECK-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
-; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-NEXT: packuswb %xmm2, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlw $4, %xmm1
-; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; CHECK-NEXT: pand %xmm2, %xmm1
-; CHECK-NEXT: pand %xmm2, %xmm0
-; CHECK-NEXT: psllw $4, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlw $2, %xmm1
-; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; CHECK-NEXT: pand %xmm2, %xmm1
-; CHECK-NEXT: pand %xmm2, %xmm0
-; CHECK-NEXT: psllw $2, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlw $1, %xmm1
-; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; CHECK-NEXT: pand %xmm2, %xmm1
-; CHECK-NEXT: pand %xmm2, %xmm0
-; CHECK-NEXT: paddb %xmm0, %xmm0
-; CHECK-NEXT: por %xmm1, %xmm0
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: testl %eax, %edi
-; CHECK-NEXT: setne %al
+; CHECK-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; CHECK-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: packuswb %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, (%rsi)
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
- %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
- %y_rev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %y_sel)
- %y_elt = extractelement <2 x i32> %y_rev, i32 0
- %and = and i32 %x, %y_elt
- %r = icmp eq i32 %and, %y_elt
- ret i1 %r
+ %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+ %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %sel)
+ store <4 x i32> %swap, ptr %p2
+ %elt = extractelement <4 x i32> %swap, i32 0
+ %res = urem i32 %a1, %elt
+ ret i32 %res
}
; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
@@ -1115,14 +1096,15 @@ define i32 @pow2_blsi_add(i32 %x, i32 %a) {
define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
; CHECK-LABEL: pow2_rotl_orzero:
; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: notb %sil
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: addl %eax, %eax
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: roll $3, %eax
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: movzbl %sil, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: addl %ecx, %ecx
+; CHECK-NEXT: bswapl %ecx
+; CHECK-NEXT: roll $3, %ecx
+; CHECK-NEXT: leal (%rdi,%rcx), %eax
+; CHECK-NEXT: andl %ecx, %eax
; CHECK-NEXT: retq
%y = select i1 %c, i32 0, i32 2
%d = call i32 @llvm.bswap.i32(i32 %y)
@@ -1137,14 +1119,15 @@ define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
define i32 @pow2_rotr_orzero(i32 %x, i1 %c) {
; CHECK-LABEL: pow2_rotr_orzero:
; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: notb %sil
-; CHECK-NEXT: movzbl %sil, %eax
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: addl %eax, %eax
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: rorl $3, %eax
-; CHECK-NEXT: notl %edi
-; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: movzbl %sil, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: addl %ecx, %ecx
+; CHECK-NEXT: bswapl %ecx
+; CHECK-NEXT: rorl $3, %ecx
+; CHECK-NEXT: leal (%rdi,%rcx), %eax
+; CHECK-NEXT: andl %ecx, %eax
; CHECK-NEXT: retq
%y = select i1 %c, i32 0, i32 2
%d = call i32 @llvm.bswap.i32(i32 %y)
>From 0e0fa71edcb7d85e062753215116893df1849141 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Mon, 2 Mar 2026 10:20:38 -0400
Subject: [PATCH 4/6] Focusing on BSwap and BitReverse only
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b315b233eb4d6..7102e28c8f40b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4753,6 +4753,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
case ISD::ROTL:
case ISD::ROTR:
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+ Depth + 1);
case ISD::BSWAP:
case ISD::BITREVERSE:
return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,
>From 45cc6caea7cddd6dbedbf93c06ba6cfb047f2162 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Tue, 3 Mar 2026 10:56:20 -0400
Subject: [PATCH 5/6] Removed rot tests that don't correspond to this pr
---
llvm/test/CodeGen/X86/known-pow2.ll | 46 -----------------------------
1 file changed, 46 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index eb5ca43bd4bfc..81b2a82cb49f2 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -1091,52 +1091,6 @@ define i32 @pow2_blsi_add(i32 %x, i32 %a) {
ret i32 %r
}
-
-; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
-define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
-; CHECK-LABEL: pow2_rotl_orzero:
-; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: notb %sil
-; CHECK-NEXT: movzbl %sil, %ecx
-; CHECK-NEXT: andl $1, %ecx
-; CHECK-NEXT: addl %ecx, %ecx
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: roll $3, %ecx
-; CHECK-NEXT: leal (%rdi,%rcx), %eax
-; CHECK-NEXT: andl %ecx, %eax
-; CHECK-NEXT: retq
- %y = select i1 %c, i32 0, i32 2
- %d = call i32 @llvm.bswap.i32(i32 %y)
- %rot_y = call i32 @llvm.fshl.i32(i32 %d, i32 %d, i32 3)
- %x_add_y = add i32 %x, %rot_y
- %r = and i32 %x_add_y, %rot_y
- ret i32 %r
-}
-
-
-; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
-define i32 @pow2_rotr_orzero(i32 %x, i1 %c) {
-; CHECK-LABEL: pow2_rotr_orzero:
-; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: notb %sil
-; CHECK-NEXT: movzbl %sil, %ecx
-; CHECK-NEXT: andl $1, %ecx
-; CHECK-NEXT: addl %ecx, %ecx
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: rorl $3, %ecx
-; CHECK-NEXT: leal (%rdi,%rcx), %eax
-; CHECK-NEXT: andl %ecx, %eax
-; CHECK-NEXT: retq
- %y = select i1 %c, i32 0, i32 2
- %d = call i32 @llvm.bswap.i32(i32 %y)
- %rot_y = call i32 @llvm.fshr.i32(i32 %d, i32 %d, i32 3)
- %x_add_y = add i32 %x, %rot_y
- %r = and i32 %x_add_y, %rot_y
- ret i32 %r
-}
-
; Test that (X - Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
; CHECK-LABEL: pow2_blsi_sub:
>From 4c9741db78f6bc7fe8da0ec4098cbb5947529013 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Wed, 4 Mar 2026 13:52:28 -0400
Subject: [PATCH 6/6] Fixed bitreverse tests
---
llvm/test/CodeGen/X86/known-pow2.ll | 46 ++++++++++++++++++++++-------
1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 81b2a82cb49f2..c49da0c8e8593 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -1015,14 +1015,17 @@ define i32 @pow2_bitreverse(i32 %a0, i32 %a1) {
; CHECK-NEXT: shrl $31, %edi
; CHECK-NEXT: leal 4(,%rdi,4), %eax
; CHECK-NEXT: bswapl %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shll $3, %eax
+; CHECK-NEXT: leal (%rax,%rcx,2), %eax
+; CHECK-NEXT: andl $805306368, %eax # imm = 0x30000000
; CHECK-NEXT: decl %eax
; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: retq
%cmp = icmp sgt i32 0, %a0
%sel = select i1 %cmp, i32 4, i32 8
- %swap = call i32 @llvm.bswap.i32(i32 %sel)
%rev = call i32 @llvm.bitreverse.i32(i32 %sel)
- %res = urem i32 %a1, %swap
+ %res = urem i32 %a1, %rev
ret i32 %res
}
@@ -1039,21 +1042,42 @@ define i32 @pow2_bitreverse_vec(<4 x i32> %a0, i32 %a1, ptr %p2) {
; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm3 = xmm0[0,1,2,3,7,6,5,4]
; CHECK-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; CHECK-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[3,2,1,0,4,5,6,7]
-; CHECK-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
-; CHECK-NEXT: packuswb %xmm0, %xmm1
-; CHECK-NEXT: movdqa %xmm1, (%rsi)
-; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: packuswb %xmm3, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $4, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: psllw $4, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $2, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: psllw $2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $1, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: paddb %xmm0, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rsi)
+; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: decl %eax
; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%cmp = icmp sgt <4 x i32> zeroinitializer, %a0
%sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
- %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %sel)
- store <4 x i32> %swap, ptr %p2
- %elt = extractelement <4 x i32> %swap, i32 0
+ %rev = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %sel)
+ store <4 x i32> %rev, ptr %p2
+ %elt = extractelement <4 x i32> %rev, i32 0
%res = urem i32 %a1, %elt
ret i32 %res
}
More information about the llvm-commits
mailing list