[llvm] b0b5834 - [DAG] Improved handling of ISD::ROTL and ISD::ROTR in isKnownToBeAPowerOfTwo (#182744)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 08:43:28 PST 2026
Author: Mir Immad
Date: 2026-03-04T16:43:22Z
New Revision: b0b583475a03bbdb34dff0f3783d7c28713de582
URL: https://github.com/llvm/llvm-project/commit/b0b583475a03bbdb34dff0f3783d7c28713de582
DIFF: https://github.com/llvm/llvm-project/commit/b0b583475a03bbdb34dff0f3783d7c28713de582.diff
LOG: [DAG] Improved handling of ISD::ROTL and ISD::ROTR in isKnownToBeAPowerOfTwo (#182744)
Fixes #181642
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/known-pow2.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6e37cd5804424..a249d12291646 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4753,7 +4753,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
case ISD::ROTL:
case ISD::ROTR:
- return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
+ return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,
Depth + 1);
case ISD::SMIN:
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 2457f3344592c..940bb62118abd 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -1006,3 +1006,83 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
%r = and i32 %x_sub_y, %y
ret i32 %r
}
+
+define i32 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr %p) {
+; CHECK-LABEL: pow2_rotl_extract_vec:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pslld $23, %xmm1
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: cvttps2dq %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; CHECK-NEXT: pmuludq %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: pmuludq %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; CHECK-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: movdqa %xmm1, (%rsi)
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+
+ %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+ %powvec = select <4 x i1> %cmp, <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>, <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
+ %d = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %powvec, <4 x i32> %powvec, <4 x i32> %rotamt)
+ store <4 x i32> %d, ptr %p
+ %elt = extractelement <4 x i32> %d, i32 0
+ %res = urem i32 %x, %elt
+ ret i32 %res
+}
+
+
+define i32 @pow2_rotr_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr %p) {
+; CHECK-LABEL: pow2_rotr_extract_vec:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: pxor %xmm3, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: movdqa %xmm3, %xmm0
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-NEXT: por %xmm0, %xmm3
+; CHECK-NEXT: psubd %xmm1, %xmm2
+; CHECK-NEXT: pslld $23, %xmm2
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: cvttps2dq %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; CHECK-NEXT: pmuludq %xmm0, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: pmuludq %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: por %xmm2, %xmm1
+; CHECK-NEXT: movdqa %xmm1, (%rsi)
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+ %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+ %powvec = select <4 x i1> %cmp, <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>, <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
+ %d = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %powvec, <4 x i32> %powvec, <4 x i32> %rotamt)
+ store <4 x i32> %d, ptr %p
+ %elt = extractelement <4 x i32> %d, i32 0
+ %res = urem i32 %x, %elt
+ ret i32 %res
+}
More information about the llvm-commits
mailing list