[llvm] [SelectionDAG] Add computeKnownBits for ISD::ROTL/ROTR. (PR #156142)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 29 20:59:34 PDT 2025
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/156142
None
>From 81df98aaeb81ae74ed7f537f8abbdb73e42b33c8 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 29 Aug 2025 20:58:13 -0700
Subject: [PATCH] [SelectionDAG] Add computeKnownBits for ISD::ROTL/ROTR.
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 +++++++++++++++
...r-of-two-or-zero-when-comparing-with-zero.ll | 17 +++++++----------
llvm/test/CodeGen/X86/pr67333.ll | 10 +++++-----
3 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 967306ae37f45..3a3bcb77f5932 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3850,6 +3850,21 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = KnownBits::ashr(Known, Known2, /*ShAmtNonZero=*/false,
Op->getFlags().hasExact());
break;
+ case ISD::ROTL:
+ case ISD::ROTR:
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
+ unsigned Amt = C->getAPIntValue().urem(BitWidth);
+
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ // Canonicalize to ROTR.
+ if (Opcode == ISD::ROTL)
+ Amt = BitWidth - Amt;
+
+ Known.Zero = Known.Zero.rotr(Amt);
+ Known.One = Known.One.rotr(Amt);
+ }
+ break;
case ISD::FSHL:
case ISD::FSHR:
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {
diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
index 9e398096bfcc5..693d1992091b4 100644
--- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
+++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
@@ -93,10 +93,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: pslld $31, %xmm3
; SSE2-NEXT: por %xmm0, %xmm3
-; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm3, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [715827883,715827883,715827883,715827883]
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
; SSE2-NEXT: retq
;
; SSE4-LABEL: p4_vector_urem_by_const__splat:
@@ -104,9 +102,9 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-NEXT: psrld $1, %xmm0
-; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882]
-; SSE4-NEXT: pminud %xmm0, %xmm1
-; SSE4-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE4-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
+; SSE4-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE4-NEXT: movdqa %xmm1, %xmm0
; SSE4-NEXT: retq
;
; AVX2-LABEL: p4_vector_urem_by_const__splat:
@@ -116,9 +114,8 @@ define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827882,715827882,715827882,715827882]
-; AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [715827883,715827883,715827883,715827883]
+; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
%t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
%t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
diff --git a/llvm/test/CodeGen/X86/pr67333.ll b/llvm/test/CodeGen/X86/pr67333.ll
index 946380971988c..cbb730857506d 100644
--- a/llvm/test/CodeGen/X86/pr67333.ll
+++ b/llvm/test/CodeGen/X86/pr67333.ll
@@ -14,12 +14,12 @@ define void @SHA256_Compress_Generic(ptr noundef %ctx) #1 {
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2
; CHECK-NEXT: vpsrld $17, %xmm2, %xmm0
; CHECK-NEXT: vpslld $15, %xmm2, %xmm3
-; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0
-; CHECK-NEXT: vpsrld $19, %xmm2, %xmm3
+; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm3
+; CHECK-NEXT: vpsrld $19, %xmm2, %xmm0
; CHECK-NEXT: vpslld $13, %xmm2, %xmm4
-; CHECK-NEXT: vpor %xmm3, %xmm4, %xmm3
-; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm3
-; CHECK-NEXT: vpxor %xmm2, %xmm3, %xmm0
+; CHECK-NEXT: vpor %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vpxor %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vmovd %ecx, %xmm4
; CHECK-NEXT: vpshufb %xmm1, %xmm4, %xmm1
; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm1
More information about the llvm-commits
mailing list