[llvm] [DAG] isKnownToBeAPowerOfTwo - Power of 2 value is known to be power of 2 after BSWAP/BITREVERSE (PR #182207)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 4 11:51:24 PST 2026


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/182207

>From b0697006456eeba909d127fe6ccd1f9848a6cbe6 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Fri, 27 Feb 2026 19:59:45 -0400
Subject: [PATCH 1/6] [DAG] isKnownToBeAPowerOfTwo function now handle BSwap
 and BitReverse

---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  4 +-
 llvm/test/CodeGen/X86/known-pow2.ll           | 84 +++++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4a2bd811b5214..17330665c822f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4732,7 +4732,9 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
 
   case ISD::ROTL:
   case ISD::ROTR:
-    return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+  case ISD::BSWAP:
+  case ISD::BITREVERSE:
+    return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,
                                   Depth + 1);
 
   case ISD::SMIN:
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 55d418b12f80f..2f5ef7dad1161 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -910,6 +910,46 @@ define i1 @pow2_and_i128(i128 %num, i128 %shift) {
   ret i1 %bool
 }
 
+define i1 @pow2_bswap(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bswap:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    testl %eax, %edi
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    retq
+  %y = select i1 %c, i32 4, i32 2
+  %d = call i32 @llvm.bswap.i32(i32 %y)
+  %and = and i32 %x, %d
+  %r = icmp eq i32 %and, %d
+  ret i1 %r
+}
+
+define i1 @pow2_bitreverse(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bitreverse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shll $6, %eax
+; CHECK-NEXT:    leal (%rax,%rcx,4), %ecx
+; CHECK-NEXT:    andl $268435456, %ecx # imm = 0x10000000
+; CHECK-NEXT:    shrl %eax
+; CHECK-NEXT:    leal (%rax,%rcx,2), %eax
+; CHECK-NEXT:    testl %eax, %edi
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    retq
+  %y = select i1 %c, i32 4, i32 2
+  %d = call i32 @llvm.bitreverse.i32(i32 %y)
+  %and = and i32 %x, %d
+  %r = icmp eq i32 %and, %d
+  ret i1 %r
+}
+
 ; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
 define i32 @pow2_blsi_add_fail(i32 %x, i32 %a) {
 ; CHECK-LABEL: pow2_blsi_add_fail:
@@ -943,6 +983,50 @@ define i32 @pow2_blsi_add(i32 %x, i32 %a) {
   ret i32 %r
 }
 
+
+; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
+define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
+; CHECK-LABEL: pow2_rotl_orzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    notb %sil
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    addl %eax, %eax
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    roll $3, %eax
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
+  %y = select i1 %c, i32 0, i32 2
+  %d = call i32 @llvm.bswap.i32(i32 %y)
+  %rot_y = call i32 @llvm.fshl.i32(i32 %d, i32 %d, i32 3)
+  %x_add_y = add i32 %x, %rot_y
+  %r = and i32 %x_add_y, %rot_y
+  ret i32 %r
+}
+
+
+; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
+define i32 @pow2_rotr_orzero(i32 %x, i1 %c) {
+; CHECK-LABEL: pow2_rotr_orzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    notb %sil
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    addl %eax, %eax
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    rorl $3, %eax
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
+  %y = select i1 %c, i32 0, i32 2
+  %d = call i32 @llvm.bswap.i32(i32 %y)
+  %rot_y = call i32 @llvm.fshr.i32(i32 %d, i32 %d, i32 3)
+  %x_add_y = add i32 %x, %rot_y
+  %r = and i32 %x_add_y, %rot_y
+  ret i32 %r
+}
+
 ; Test that (X - Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
 define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
 ; CHECK-LABEL: pow2_blsi_sub:

>From d1c651209cb561fdf1181e750de564126a81a8ff Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Sun, 1 Mar 2026 21:18:59 -0400
Subject: [PATCH 2/6] Added test cases that use demanded elements for bswap and
 bitreverse

---
 llvm/test/CodeGen/X86/known-pow2.ll | 80 +++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 33767a8eac4e1..5d4f763fd7c81 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -974,6 +974,33 @@ define i1 @pow2_bswap(i32 %x,i1 %c){
   ret i1 %r
 }
 
+define i1 @pow2_bswap_extractelt(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bswap_extractelt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    testb $1, %sil
+; CHECK-NEXT:    jne .LBB45_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = [4,3,0,0]
+; CHECK-NEXT:    jmp .LBB45_3
+; CHECK-NEXT:  .LBB45_1:
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = [2,3,0,0]
+; CHECK-NEXT:  .LBB45_3:
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT:    packuswb %xmm0, %xmm0
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    testl %eax, %edi
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    retq
+  %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
+  %y_bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %y_sel)
+  %y_elt = extractelement <2 x i32> %y_bswap, i32 0
+  %and = and i32 %x, %y_elt
+  %r = icmp eq i32 %and, %y_elt
+  ret i1 %r
+}
+
 define i1 @pow2_bitreverse(i32 %x,i1 %c){
 ; CHECK-LABEL: pow2_bitreverse:
 ; CHECK:       # %bb.0:
@@ -997,6 +1024,59 @@ define i1 @pow2_bitreverse(i32 %x,i1 %c){
   ret i1 %r
 }
 
+define i1 @pow2_bitreverse_extractelt(i32 %x,i1 %c){
+; CHECK-LABEL: pow2_bitreverse_extractelt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    testb $1, %sil
+; CHECK-NEXT:    jne .LBB47_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = [4,3,0,0]
+; CHECK-NEXT:    jmp .LBB47_3
+; CHECK-NEXT:  .LBB47_1:
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = [2,3,0,0]
+; CHECK-NEXT:  .LBB47_3:
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT:    packuswb %xmm2, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlw $4, %xmm1
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; CHECK-NEXT:    pand %xmm2, %xmm1
+; CHECK-NEXT:    pand %xmm2, %xmm0
+; CHECK-NEXT:    psllw $4, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlw $2, %xmm1
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; CHECK-NEXT:    pand %xmm2, %xmm1
+; CHECK-NEXT:    pand %xmm2, %xmm0
+; CHECK-NEXT:    psllw $2, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlw $1, %xmm1
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; CHECK-NEXT:    pand %xmm2, %xmm1
+; CHECK-NEXT:    pand %xmm2, %xmm0
+; CHECK-NEXT:    paddb %xmm0, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    testl %eax, %edi
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    retq
+  %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
+  %y_rev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %y_sel)
+  %y_elt = extractelement <2 x i32> %y_rev, i32 0
+  %and = and i32 %x, %y_elt
+  %r = icmp eq i32 %and, %y_elt
+  ret i1 %r
+}
+
 ; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
 define i32 @pow2_blsi_add_fail(i32 %x, i32 %a) {
 ; CHECK-LABEL: pow2_blsi_add_fail:

>From 0c80a77e1f75fa727d3d53c3b5f2dad38e5cbfae Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Mon, 2 Mar 2026 10:17:18 -0400
Subject: [PATCH 3/6] Changed to urem pattern instead

---
 llvm/test/CodeGen/X86/known-pow2.ll | 201 +++++++++++++---------------
 1 file changed, 92 insertions(+), 109 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 5d4f763fd7c81..eb5ca43bd4bfc 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -957,124 +957,105 @@ define i1 @pow2_and_i128(i128 %num, i128 %shift) {
   ret i1 %bool
 }
 
-define i1 @pow2_bswap(i32 %x,i1 %c){
+define i32 @pow2_bswap(i32 %a0, i32 %a1) {
 ; CHECK-LABEL: pow2_bswap:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT:    andl $1, %esi
-; CHECK-NEXT:    leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    shrl $31, %edi
+; CHECK-NEXT:    leal 4(,%rdi,4), %eax
 ; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    testl %eax, %edi
-; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    andl %esi, %eax
 ; CHECK-NEXT:    retq
-  %y = select i1 %c, i32 4, i32 2
-  %d = call i32 @llvm.bswap.i32(i32 %y)
-  %and = and i32 %x, %d
-  %r = icmp eq i32 %and, %d
-  ret i1 %r
+  %cmp = icmp sgt i32 0, %a0
+  %sel = select i1 %cmp, i32 4, i32 8
+  %swap = call i32 @llvm.bswap.i32(i32 %sel)
+  %res = urem i32 %a1, %swap
+  ret i32 %res
 }
 
-define i1 @pow2_bswap_extractelt(i32 %x,i1 %c){
-; CHECK-LABEL: pow2_bswap_extractelt:
+define i32 @pow2_bswap_vec(<4 x i32> %a0, i32 %a1, ptr %p2) {
+; CHECK-LABEL: pow2_bswap_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb $1, %sil
-; CHECK-NEXT:    jne .LBB45_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    movq {{.*#+}} xmm0 = [4,3,0,0]
-; CHECK-NEXT:    jmp .LBB45_3
-; CHECK-NEXT:  .LBB45_1:
-; CHECK-NEXT:    movq {{.*#+}} xmm0 = [2,3,0,0]
-; CHECK-NEXT:  .LBB45_3:
 ; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT:    pxor %xmm2, %xmm2
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT:    por %xmm0, %xmm2
+; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
 ; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-NEXT:    packuswb %xmm0, %xmm0
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    testl %eax, %edi
-; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-NEXT:    packuswb %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, (%rsi)
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
-  %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
-  %y_bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %y_sel)
-  %y_elt = extractelement <2 x i32> %y_bswap, i32 0
-  %and = and i32 %x, %y_elt
-  %r = icmp eq i32 %and, %y_elt
-  ret i1 %r
+  %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+  %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
+  %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %sel)
+  store <4 x i32> %swap, ptr %p2
+  %elt = extractelement <4 x i32> %swap, i32 0
+  %res = urem i32 %a1, %elt
+  ret i32 %res
 }
 
-define i1 @pow2_bitreverse(i32 %x,i1 %c){
+define i32 @pow2_bitreverse(i32 %a0, i32 %a1) {
 ; CHECK-LABEL: pow2_bitreverse:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
-; CHECK-NEXT:    andl $1, %esi
-; CHECK-NEXT:    leal 2(%rsi,%rsi), %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    shrl $31, %edi
+; CHECK-NEXT:    leal 4(,%rdi,4), %eax
 ; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    shll $6, %eax
-; CHECK-NEXT:    leal (%rax,%rcx,4), %ecx
-; CHECK-NEXT:    andl $268435456, %ecx # imm = 0x10000000
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    leal (%rax,%rcx,2), %eax
-; CHECK-NEXT:    testl %eax, %edi
-; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    andl %esi, %eax
 ; CHECK-NEXT:    retq
-  %y = select i1 %c, i32 4, i32 2
-  %d = call i32 @llvm.bitreverse.i32(i32 %y)
-  %and = and i32 %x, %d
-  %r = icmp eq i32 %and, %d
-  ret i1 %r
+  %cmp = icmp sgt i32 0, %a0
+  %sel = select i1 %cmp, i32 4, i32 8
+  %swap = call i32 @llvm.bswap.i32(i32 %sel)
+  %rev = call i32 @llvm.bitreverse.i32(i32 %sel)
+  %res = urem i32 %a1, %swap
+  ret i32 %res
 }
 
-define i1 @pow2_bitreverse_extractelt(i32 %x,i1 %c){
-; CHECK-LABEL: pow2_bitreverse_extractelt:
+define i32 @pow2_bitreverse_vec(<4 x i32> %a0, i32 %a1, ptr %p2) {
+; CHECK-LABEL: pow2_bitreverse_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb $1, %sil
-; CHECK-NEXT:    jne .LBB47_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    movq {{.*#+}} xmm0 = [4,3,0,0]
-; CHECK-NEXT:    jmp .LBB47_3
-; CHECK-NEXT:  .LBB47_1:
-; CHECK-NEXT:    movq {{.*#+}} xmm0 = [2,3,0,0]
-; CHECK-NEXT:  .LBB47_3:
 ; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    movdqa %xmm0, %xmm2
-; CHECK-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
-; CHECK-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
-; CHECK-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT:    pxor %xmm2, %xmm2
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT:    por %xmm0, %xmm2
+; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
 ; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
 ; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-NEXT:    packuswb %xmm2, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm1
-; CHECK-NEXT:    psrlw $4, %xmm1
-; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; CHECK-NEXT:    pand %xmm2, %xmm1
-; CHECK-NEXT:    pand %xmm2, %xmm0
-; CHECK-NEXT:    psllw $4, %xmm0
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm1
-; CHECK-NEXT:    psrlw $2, %xmm1
-; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; CHECK-NEXT:    pand %xmm2, %xmm1
-; CHECK-NEXT:    pand %xmm2, %xmm0
-; CHECK-NEXT:    psllw $2, %xmm0
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm1
-; CHECK-NEXT:    psrlw $1, %xmm1
-; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; CHECK-NEXT:    pand %xmm2, %xmm1
-; CHECK-NEXT:    pand %xmm2, %xmm0
-; CHECK-NEXT:    paddb %xmm0, %xmm0
-; CHECK-NEXT:    por %xmm1, %xmm0
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    testl %eax, %edi
-; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-NEXT:    packuswb %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, (%rsi)
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
-  %y_sel = select i1 %c, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 4, i32 3>
-  %y_rev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %y_sel)
-  %y_elt = extractelement <2 x i32> %y_rev, i32 0
-  %and = and i32 %x, %y_elt
-  %r = icmp eq i32 %and, %y_elt
-  ret i1 %r
+  %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+  %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
+  %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %sel)
+  store <4 x i32> %swap, ptr %p2
+  %elt = extractelement <4 x i32> %swap, i32 0
+  %res = urem i32 %a1, %elt
+  ret i32 %res
 }
 
 ; Negative test: Y = a | 1 is always odd/non-zero but not pow2, fold should not trigger.
@@ -1115,14 +1096,15 @@ define i32 @pow2_blsi_add(i32 %x, i32 %a) {
 define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
 ; CHECK-LABEL: pow2_rotl_orzero:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    notb %sil
-; CHECK-NEXT:    movzbl %sil, %eax
-; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    addl %eax, %eax
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    roll $3, %eax
-; CHECK-NEXT:    notl %edi
-; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    movzbl %sil, %ecx
+; CHECK-NEXT:    andl $1, %ecx
+; CHECK-NEXT:    addl %ecx, %ecx
+; CHECK-NEXT:    bswapl %ecx
+; CHECK-NEXT:    roll $3, %ecx
+; CHECK-NEXT:    leal (%rdi,%rcx), %eax
+; CHECK-NEXT:    andl %ecx, %eax
 ; CHECK-NEXT:    retq
   %y = select i1 %c, i32 0, i32 2
   %d = call i32 @llvm.bswap.i32(i32 %y)
@@ -1137,14 +1119,15 @@ define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
 define i32 @pow2_rotr_orzero(i32 %x, i1 %c) {
 ; CHECK-LABEL: pow2_rotr_orzero:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    notb %sil
-; CHECK-NEXT:    movzbl %sil, %eax
-; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    addl %eax, %eax
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    rorl $3, %eax
-; CHECK-NEXT:    notl %edi
-; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    movzbl %sil, %ecx
+; CHECK-NEXT:    andl $1, %ecx
+; CHECK-NEXT:    addl %ecx, %ecx
+; CHECK-NEXT:    bswapl %ecx
+; CHECK-NEXT:    rorl $3, %ecx
+; CHECK-NEXT:    leal (%rdi,%rcx), %eax
+; CHECK-NEXT:    andl %ecx, %eax
 ; CHECK-NEXT:    retq
   %y = select i1 %c, i32 0, i32 2
   %d = call i32 @llvm.bswap.i32(i32 %y)

>From 0e0fa71edcb7d85e062753215116893df1849141 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Mon, 2 Mar 2026 10:20:38 -0400
Subject: [PATCH 4/6] Focusing on BSwap and BitReverse only

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b315b233eb4d6..7102e28c8f40b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4753,6 +4753,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
 
   case ISD::ROTL:
   case ISD::ROTR:
+    return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+                                  Depth + 1);
   case ISD::BSWAP:
   case ISD::BITREVERSE:
     return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,

>From 45cc6caea7cddd6dbedbf93c06ba6cfb047f2162 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Tue, 3 Mar 2026 10:56:20 -0400
Subject: [PATCH 5/6] Removed rot tests that don't correspond to this pr

---
 llvm/test/CodeGen/X86/known-pow2.ll | 46 -----------------------------
 1 file changed, 46 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index eb5ca43bd4bfc..81b2a82cb49f2 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -1091,52 +1091,6 @@ define i32 @pow2_blsi_add(i32 %x, i32 %a) {
   ret i32 %r
 }
 
-
-; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
-define i32 @pow2_rotl_orzero(i32 %x, i1 %c) {
-; CHECK-LABEL: pow2_rotl_orzero:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    notb %sil
-; CHECK-NEXT:    movzbl %sil, %ecx
-; CHECK-NEXT:    andl $1, %ecx
-; CHECK-NEXT:    addl %ecx, %ecx
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    roll $3, %ecx
-; CHECK-NEXT:    leal (%rdi,%rcx), %eax
-; CHECK-NEXT:    andl %ecx, %eax
-; CHECK-NEXT:    retq
-  %y = select i1 %c, i32 0, i32 2
-  %d = call i32 @llvm.bswap.i32(i32 %y)
-  %rot_y = call i32 @llvm.fshl.i32(i32 %d, i32 %d, i32 3)
-  %x_add_y = add i32 %x, %rot_y
-  %r = and i32 %x_add_y, %rot_y
-  ret i32 %r
-}
-
-
-; Test that (X + Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
-define i32 @pow2_rotr_orzero(i32 %x, i1 %c) {
-; CHECK-LABEL: pow2_rotr_orzero:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    notb %sil
-; CHECK-NEXT:    movzbl %sil, %ecx
-; CHECK-NEXT:    andl $1, %ecx
-; CHECK-NEXT:    addl %ecx, %ecx
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    rorl $3, %ecx
-; CHECK-NEXT:    leal (%rdi,%rcx), %eax
-; CHECK-NEXT:    andl %ecx, %eax
-; CHECK-NEXT:    retq
-  %y = select i1 %c, i32 0, i32 2
-  %d = call i32 @llvm.bswap.i32(i32 %y)
-  %rot_y = call i32 @llvm.fshr.i32(i32 %d, i32 %d, i32 3)
-  %x_add_y = add i32 %x, %rot_y
-  %r = and i32 %x_add_y, %rot_y
-  ret i32 %r
-}
-
 ; Test that (X - Y) & Y --> ~X & Y when Y = a & -a (pow2-or-zero).
 define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
 ; CHECK-LABEL: pow2_blsi_sub:

>From 4c9741db78f6bc7fe8da0ec4098cbb5947529013 Mon Sep 17 00:00:00 2001
From: Manuel Fernando Dun Jimenez <manueldun at gmail.com>
Date: Wed, 4 Mar 2026 13:52:28 -0400
Subject: [PATCH 6/6] Fixed bitreverse tests

---
 llvm/test/CodeGen/X86/known-pow2.ll | 46 ++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 81b2a82cb49f2..c49da0c8e8593 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -1015,14 +1015,17 @@ define i32 @pow2_bitreverse(i32 %a0, i32 %a1) {
 ; CHECK-NEXT:    shrl $31, %edi
 ; CHECK-NEXT:    leal 4(,%rdi,4), %eax
 ; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shll $3, %eax
+; CHECK-NEXT:    leal (%rax,%rcx,2), %eax
+; CHECK-NEXT:    andl $805306368, %eax # imm = 0x30000000
 ; CHECK-NEXT:    decl %eax
 ; CHECK-NEXT:    andl %esi, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt i32 0, %a0
   %sel = select i1 %cmp, i32 4, i32 8
-  %swap = call i32 @llvm.bswap.i32(i32 %sel)
   %rev = call i32 @llvm.bitreverse.i32(i32 %sel)
-  %res = urem i32 %a1, %swap
+  %res = urem i32 %a1, %rev
   ret i32 %res
 }
 
@@ -1039,21 +1042,42 @@ define i32 @pow2_bitreverse_vec(<4 x i32> %a0, i32 %a1, ptr %p2) {
 ; CHECK-NEXT:    movdqa %xmm2, %xmm0
 ; CHECK-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
 ; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm3 = xmm0[0,1,2,3,7,6,5,4]
 ; CHECK-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; CHECK-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[3,2,1,0,4,5,6,7]
-; CHECK-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
-; CHECK-NEXT:    packuswb %xmm0, %xmm1
-; CHECK-NEXT:    movdqa %xmm1, (%rsi)
-; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT:    packuswb %xmm3, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlw $4, %xmm1
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; CHECK-NEXT:    pand %xmm2, %xmm1
+; CHECK-NEXT:    pand %xmm2, %xmm0
+; CHECK-NEXT:    psllw $4, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlw $2, %xmm1
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; CHECK-NEXT:    pand %xmm2, %xmm1
+; CHECK-NEXT:    pand %xmm2, %xmm0
+; CHECK-NEXT:    psllw $2, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlw $1, %xmm1
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; CHECK-NEXT:    pand %xmm2, %xmm1
+; CHECK-NEXT:    pand %xmm2, %xmm0
+; CHECK-NEXT:    paddb %xmm0, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, (%rsi)
+; CHECK-NEXT:    movd %xmm0, %eax
 ; CHECK-NEXT:    decl %eax
 ; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
   %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
-  %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %sel)
-  store <4 x i32> %swap, ptr %p2
-  %elt = extractelement <4 x i32> %swap, i32 0
+  %rev = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %sel)
+  store <4 x i32> %rev, ptr %p2
+  %elt = extractelement <4 x i32> %rev, i32 0
   %res = urem i32 %a1, %elt
   ret i32 %res
 }



More information about the llvm-commits mailing list