[llvm] [DAG] Improved handling of ISD::ROTL and ISD::ROTR in isKnownToBeAPowerOfTwo (PR #182744)

Wed Mar 4 04:33:30 PST 2026

https://github.com/mirimmad updated https://github.com/llvm/llvm-project/pull/182744

>From 15e6b6014648ab68faa0a7cfd46a406bd98c1e0a Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Sun, 22 Feb 2026 19:11:03 +0530
Subject: [PATCH 01/11] use 'OrZero' in 'isKnownToBeAPowerOfTwo'

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 084700c50bd06..9f3bd9f15d1d7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4753,8 +4753,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
 
   case ISD::ROTL:
   case ISD::ROTR:
-    return isKnownToBeAPowerOfTwo(Val.getOperand(0), /*OrZero=*/false,
-                                  Depth + 1);
+    return isKnownToBeAPowerOfTwo(Val.getOperand(0), false, Depth + 1);
 
   case ISD::SMIN:
   case ISD::SMAX:

>From 1bd6086c369f613b1c79551e365c41ef5dc101fd Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Sun, 22 Feb 2026 19:26:42 +0530
Subject: [PATCH 02/11] Add a test for ISD::ROTL and ISD::ROTR

---
 .../AArch64/AArch64SelectionDAGTest.cpp       | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index b0c48e8c97995..36f266a8bdff3 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -1172,6 +1172,55 @@ TEST_F(AArch64SelectionDAGTest,
   EXPECT_EQ(SplatIdx, 0);
 }
 
+TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTL) {
+  SDLoc Loc;
+  auto IntVT = EVT::getIntegerVT(Context, 32);
+
+  // Power-of-two values
+  auto Pow2_4 = DAG->getConstant(4, Loc, IntVT);
+  auto Pow2_8 = DAG->getConstant(8, Loc, IntVT);
+
+  // Non-power-of-two values
+  auto NonPow2_5 = DAG->getConstant(5, Loc, IntVT);
+  auto NonPow2_0 = DAG->getConstant(0, Loc, IntVT);
+
+  auto RotAmount = DAG->getConstant(3, Loc, IntVT);
+
+  auto RotlPow2_4 = DAG->getNode(ISD::ROTL, Loc, IntVT, Pow2_4, RotAmount);
+  auto RotlPow2_8 = DAG->getNode(ISD::ROTL, Loc, IntVT, Pow2_8, RotAmount);
+  auto RotlNonPow2_5 =
+      DAG->getNode(ISD::ROTL, Loc, IntVT, NonPow2_5, RotAmount);
+  auto RotlZero = DAG->getNode(ISD::ROTL, Loc, IntVT, NonPow2_0, RotAmount);
+
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlPow2_4));
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlPow2_8));
+
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlNonPow2_5));
+
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlZero));
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlZero, /*OrZero=*/true));
+}
+
+TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTR) {
+  SDLoc Loc;
+  auto IntVT = EVT::getIntegerVT(Context, 32);
+
+  auto Pow2_16 = DAG->getConstant(16, Loc, IntVT);
+  auto NonPow2_6 = DAG->getConstant(6, Loc, IntVT);
+  auto Zero = DAG->getConstant(0, Loc, IntVT);
+
+  auto RotAmount = DAG->getConstant(5, Loc, IntVT);
+
+  auto RotrPow2 = DAG->getNode(ISD::ROTR, Loc, IntVT, Pow2_16, RotAmount);
+  auto RotrNonPow2 = DAG->getNode(ISD::ROTR, Loc, IntVT, NonPow2_6, RotAmount);
+  auto RotrZero = DAG->getNode(ISD::ROTR, Loc, IntVT, Zero, RotAmount);
+
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotrPow2));
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrNonPow2));
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrZero));
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotrZero, /*OrZero=*/true));
+}
+
 TEST_F(AArch64SelectionDAGTest, getRepeatedSequence_Patterns) {
   TargetLowering TL(*TM, *STI);
 

>From f105d54a20e673420cb9bccc23f44b74929fec59 Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Sun, 22 Feb 2026 23:07:34 +0530
Subject: [PATCH 03/11] Add missing 'DemandedElts' and 'OrZero' arguments to
 'isKnownToBeAPowerOfTwo' and associated tests

---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  2 +-
 .../AArch64/AArch64SelectionDAGTest.cpp       | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9f3bd9f15d1d7..de2ee2ee59622 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4753,7 +4753,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
 
   case ISD::ROTL:
   case ISD::ROTR:
-    return isKnownToBeAPowerOfTwo(Val.getOperand(0), false, Depth + 1);
+    return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero, Depth + 1);
 
   case ISD::SMIN:
   case ISD::SMAX:
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index 36f266a8bdff3..e5f61576f6ffd 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -1199,6 +1199,17 @@ TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTL) {
 
   EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlZero));
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlZero, /*OrZero=*/true));
+
+    // Also verify DemandedElts is forwarded through ROTL for vector lanes.
+    auto VecVT = EVT::getVectorVT(Context, IntVT, 2, /*IsScalable=*/false);
+    auto PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_4, NonPow2_0});
+    auto RotAmountVec = DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
+    auto RotlPowAndZero =
+      DAG->getNode(ISD::ROTL, Loc, VecVT, PowAndZero, RotAmountVec);
+    APInt DemandAll(2, 3);
+    EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlPowAndZero, DemandAll));
+    EXPECT_TRUE(
+      DAG->isKnownToBeAPowerOfTwo(RotlPowAndZero, DemandAll, /*OrZero=*/true));
 }
 
 TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTR) {
@@ -1219,6 +1230,16 @@ TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTR) {
   EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrNonPow2));
   EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrZero));
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotrZero, /*OrZero=*/true));
+
+    auto VecVT = EVT::getVectorVT(Context, IntVT, 2, /*IsScalable=*/false);
+    auto PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_16, Zero});
+    auto RotAmountVec = DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
+    auto RotrPowAndZero =
+      DAG->getNode(ISD::ROTR, Loc, VecVT, PowAndZero, RotAmountVec);
+    APInt DemandAll(2, 3);
+    EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrPowAndZero, DemandAll));
+    EXPECT_TRUE(
+      DAG->isKnownToBeAPowerOfTwo(RotrPowAndZero, DemandAll, /*OrZero=*/true));
 }
 
 TEST_F(AArch64SelectionDAGTest, getRepeatedSequence_Patterns) {

>From 092f13453ff7c8bafc9d723d16335140e6e67b84 Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Tue, 24 Feb 2026 16:48:59 +0530
Subject: [PATCH 04/11] Remove 'auto' and add a test for vector case

---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  3 +-
 llvm/test/CodeGen/X86/known-pow2.ll           | 31 ++++++++
 .../AArch64/AArch64SelectionDAGTest.cpp       | 73 ++++++++++---------
 3 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index de2ee2ee59622..bb2a8accc4f24 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4753,7 +4753,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val,
 
   case ISD::ROTL:
   case ISD::ROTR:
-    return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero, Depth + 1);
+    return isKnownToBeAPowerOfTwo(Val.getOperand(0), DemandedElts, OrZero,
+                                  Depth + 1);
 
   case ISD::SMIN:
   case ISD::SMAX:
diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 2457f3344592c..d5c6ac773d496 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -11,6 +11,8 @@ declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
 
 define <4 x i32> @pow2_non_splat_vec(<4 x i32> %x) {
 ; CHECK-LABEL: pow2_non_splat_vec:
@@ -1006,3 +1008,32 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
   %r = and i32 %x_sub_y, %y
   ret i32 %r
 }
+
+define <2 x i32> @pow2_rotl_vec() {
+; CHECK-LABEL: pow2_rotl_vec:
+; CHECK:       # %bb.0:
+; CHECK:       xmm0 = [32,0,0,0]
+; CHECK:       retq
+entry:
+  ; build vector <4,0>
+  %v0 = insertelement <2 x i32> undef, i32 4, i32 0
+  %v1 = insertelement <2 x i32> %v0, i32 0, i32 1
+  %amt0 = insertelement <2 x i32> undef, i32 3, i32 0
+  %amt1 = insertelement <2 x i32> %amt0, i32 3, i32 1
+  %r = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %v1, <2 x i32> %v1, <2 x i32> %amt1)
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @pow2_rotr_vec() {
+; CHECK-LABEL: pow2_rotr_vec:
+; CHECK:       # %bb.0:
+; CHECK:       xmm0 = [2147483648,0,0,0]
+; CHECK:       retq
+entry:
+  %v0 = insertelement <2 x i32> undef, i32 16, i32 0
+  %v1 = insertelement <2 x i32> %v0, i32 0, i32 1
+  %amt0 = insertelement <2 x i32> undef, i32 5, i32 0
+  %amt1 = insertelement <2 x i32> %amt0, i32 5, i32 1
+  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %v1, <2 x i32> %v1, <2 x i32> %amt1)
+  ret <2 x i32> %r
+}
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index e5f61576f6ffd..d599cf14adc07 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -1174,23 +1174,24 @@ TEST_F(AArch64SelectionDAGTest,
 
 TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTL) {
   SDLoc Loc;
-  auto IntVT = EVT::getIntegerVT(Context, 32);
-
   // Power-of-two values
-  auto Pow2_4 = DAG->getConstant(4, Loc, IntVT);
-  auto Pow2_8 = DAG->getConstant(8, Loc, IntVT);
+  SDValue Pow2_4 = DAG->getConstant(4, Loc, MVT::i32);
+  SDValue Pow2_8 = DAG->getConstant(8, Loc, MVT::i32);
 
   // Non-power-of-two values
-  auto NonPow2_5 = DAG->getConstant(5, Loc, IntVT);
-  auto NonPow2_0 = DAG->getConstant(0, Loc, IntVT);
+  SDValue NonPow2_5 = DAG->getConstant(5, Loc, MVT::i32);
+  SDValue NonPow2_0 = DAG->getConstant(0, Loc, MVT::i32);
 
-  auto RotAmount = DAG->getConstant(3, Loc, IntVT);
+  SDValue RotAmount = DAG->getConstant(3, Loc, MVT::i32);
 
-  auto RotlPow2_4 = DAG->getNode(ISD::ROTL, Loc, IntVT, Pow2_4, RotAmount);
-  auto RotlPow2_8 = DAG->getNode(ISD::ROTL, Loc, IntVT, Pow2_8, RotAmount);
-  auto RotlNonPow2_5 =
-      DAG->getNode(ISD::ROTL, Loc, IntVT, NonPow2_5, RotAmount);
-  auto RotlZero = DAG->getNode(ISD::ROTL, Loc, IntVT, NonPow2_0, RotAmount);
+  SDValue RotlPow2_4 =
+      DAG->getNode(ISD::ROTL, Loc, MVT::i32, Pow2_4, RotAmount);
+  SDValue RotlPow2_8 =
+      DAG->getNode(ISD::ROTL, Loc, MVT::i32, Pow2_8, RotAmount);
+  SDValue RotlNonPow2_5 =
+      DAG->getNode(ISD::ROTL, Loc, MVT::i32, NonPow2_5, RotAmount);
+  SDValue RotlZero =
+      DAG->getNode(ISD::ROTL, Loc, MVT::i32, NonPow2_0, RotAmount);
 
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlPow2_4));
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlPow2_8));
@@ -1200,45 +1201,47 @@ TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTL) {
   EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlZero));
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlZero, /*OrZero=*/true));
 
-    // Also verify DemandedElts is forwarded through ROTL for vector lanes.
-    auto VecVT = EVT::getVectorVT(Context, IntVT, 2, /*IsScalable=*/false);
-    auto PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_4, NonPow2_0});
-    auto RotAmountVec = DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
-    auto RotlPowAndZero =
+  // Also verify DemandedElts is forwarded through ROTL for vector lanes.
+  MVT::SimpleValueType VecVT = MVT::v2i32;
+  SDValue PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_4, NonPow2_0});
+  SDValue RotAmountVec =
+      DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
+  SDValue RotlPowAndZero =
       DAG->getNode(ISD::ROTL, Loc, VecVT, PowAndZero, RotAmountVec);
-    APInt DemandAll(2, 3);
-    EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlPowAndZero, DemandAll));
-    EXPECT_TRUE(
+  APInt DemandAll(2, 3);
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlPowAndZero, DemandAll));
+  EXPECT_TRUE(
       DAG->isKnownToBeAPowerOfTwo(RotlPowAndZero, DemandAll, /*OrZero=*/true));
 }
 
 TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTR) {
   SDLoc Loc;
-  auto IntVT = EVT::getIntegerVT(Context, 32);
 
-  auto Pow2_16 = DAG->getConstant(16, Loc, IntVT);
-  auto NonPow2_6 = DAG->getConstant(6, Loc, IntVT);
-  auto Zero = DAG->getConstant(0, Loc, IntVT);
+  SDValue Pow2_16 = DAG->getConstant(16, Loc, MVT::i32);
+  SDValue NonPow2_6 = DAG->getConstant(6, Loc, MVT::i32);
+  SDValue Zero = DAG->getConstant(0, Loc, MVT::i32);
 
-  auto RotAmount = DAG->getConstant(5, Loc, IntVT);
+  SDValue RotAmount = DAG->getConstant(5, Loc, MVT::i32);
 
-  auto RotrPow2 = DAG->getNode(ISD::ROTR, Loc, IntVT, Pow2_16, RotAmount);
-  auto RotrNonPow2 = DAG->getNode(ISD::ROTR, Loc, IntVT, NonPow2_6, RotAmount);
-  auto RotrZero = DAG->getNode(ISD::ROTR, Loc, IntVT, Zero, RotAmount);
+  SDValue RotrPow2 = DAG->getNode(ISD::ROTR, Loc, MVT::i32, Pow2_16, RotAmount);
+  SDValue RotrNonPow2 =
+      DAG->getNode(ISD::ROTR, Loc, MVT::i32, NonPow2_6, RotAmount);
+  SDValue RotrZero = DAG->getNode(ISD::ROTR, Loc, MVT::i32, Zero, RotAmount);
 
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotrPow2));
   EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrNonPow2));
   EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrZero));
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotrZero, /*OrZero=*/true));
 
-    auto VecVT = EVT::getVectorVT(Context, IntVT, 2, /*IsScalable=*/false);
-    auto PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_16, Zero});
-    auto RotAmountVec = DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
-    auto RotrPowAndZero =
+  MVT::SimpleValueType VecVT = MVT::v2i32;
+  SDValue PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_16, Zero});
+  SDValue RotAmountVec =
+      DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
+  SDValue RotrPowAndZero =
       DAG->getNode(ISD::ROTR, Loc, VecVT, PowAndZero, RotAmountVec);
-    APInt DemandAll(2, 3);
-    EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrPowAndZero, DemandAll));
-    EXPECT_TRUE(
+  APInt DemandAll(2, 3);
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrPowAndZero, DemandAll));
+  EXPECT_TRUE(
       DAG->isKnownToBeAPowerOfTwo(RotrPowAndZero, DemandAll, /*OrZero=*/true));
 }
 

>From f3a380ff71a5730d138ce1aaec6968935c1638f3 Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Wed, 25 Feb 2026 16:04:34 +0530
Subject: [PATCH 05/11] Use 'SELECT' when creating ROTL/ROTR nodes Use
 zeroinitializer instead of undef.

---
 llvm/test/CodeGen/X86/known-pow2.ll           |  8 +-
 .../AArch64/AArch64SelectionDAGTest.cpp       | 91 ++++++++++++++-----
 2 files changed, 70 insertions(+), 29 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index d5c6ac773d496..91af45a3f357c 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -1016,9 +1016,9 @@ define <2 x i32> @pow2_rotl_vec() {
 ; CHECK:       retq
 entry:
   ; build vector <4,0>
-  %v0 = insertelement <2 x i32> undef, i32 4, i32 0
+  %v0 = insertelement <2 x i32> zeroinitializer, i32 4, i32 0
   %v1 = insertelement <2 x i32> %v0, i32 0, i32 1
-  %amt0 = insertelement <2 x i32> undef, i32 3, i32 0
+  %amt0 = insertelement <2 x i32> zeroinitializer, i32 3, i32 0
   %amt1 = insertelement <2 x i32> %amt0, i32 3, i32 1
   %r = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %v1, <2 x i32> %v1, <2 x i32> %amt1)
   ret <2 x i32> %r
@@ -1030,9 +1030,9 @@ define <2 x i32> @pow2_rotr_vec() {
 ; CHECK:       xmm0 = [2147483648,0,0,0]
 ; CHECK:       retq
 entry:
-  %v0 = insertelement <2 x i32> undef, i32 16, i32 0
+  %v0 = insertelement <2 x i32> zeroinitializer, i32 16, i32 0
   %v1 = insertelement <2 x i32> %v0, i32 0, i32 1
-  %amt0 = insertelement <2 x i32> undef, i32 5, i32 0
+  %amt0 = insertelement <2 x i32> zeroinitializer, i32 5, i32 0
   %amt1 = insertelement <2 x i32> %amt0, i32 5, i32 1
   %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %v1, <2 x i32> %v1, <2 x i32> %amt1)
   ret <2 x i32> %r
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index d599cf14adc07..4addfe97d98c1 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -1177,55 +1177,83 @@ TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTL) {
   // Power-of-two values
   SDValue Pow2_4 = DAG->getConstant(4, Loc, MVT::i32);
   SDValue Pow2_8 = DAG->getConstant(8, Loc, MVT::i32);
-
   // Non-power-of-two values
-  SDValue NonPow2_5 = DAG->getConstant(5, Loc, MVT::i32);
+  SDValue NonPow2_3 = DAG->getConstant(3, Loc, MVT::i32);
+  SDValue NonPow2_9 = DAG->getConstant(9, Loc, MVT::i32);
+
+  // Zero value
   SDValue NonPow2_0 = DAG->getConstant(0, Loc, MVT::i32);
 
   SDValue RotAmount = DAG->getConstant(3, Loc, MVT::i32);
 
-  SDValue RotlPow2_4 =
-      DAG->getNode(ISD::ROTL, Loc, MVT::i32, Pow2_4, RotAmount);
-  SDValue RotlPow2_8 =
-      DAG->getNode(ISD::ROTL, Loc, MVT::i32, Pow2_8, RotAmount);
-  SDValue RotlNonPow2_5 =
-      DAG->getNode(ISD::ROTL, Loc, MVT::i32, NonPow2_5, RotAmount);
-  SDValue RotlZero =
-      DAG->getNode(ISD::ROTL, Loc, MVT::i32, NonPow2_0, RotAmount);
+  SDValue Cond = DAG->getCopyFromReg(DAG->getEntryNode(), Loc, 1, MVT::i1);
 
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlPow2_4));
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlPow2_8));
+  SDValue Sel_pow2 =
+      DAG->getNode(ISD::SELECT, Loc, MVT::i32, Cond, Pow2_4, Pow2_8);
+  SDValue Sel_non_pow2 =
+      DAG->getNode(ISD::SELECT, Loc, MVT::i32, Cond, NonPow2_3, NonPow2_9);
 
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlNonPow2_5));
+  SDValue RotlPow2 =
+      DAG->getNode(ISD::ROTL, Loc, MVT::i32, Sel_pow2, RotAmount);
+  SDValue RotlNonPow2 =
+      DAG->getNode(ISD::ROTL, Loc, MVT::i32, Sel_non_pow2, RotAmount);
+  SDValue RotlZero =
+      DAG->getNode(ISD::ROTL, Loc, MVT::i32, NonPow2_0, RotAmount);
 
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlPow2));
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlNonPow2));
   EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlZero));
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlZero, /*OrZero=*/true));
 
   // Also verify DemandedElts is forwarded through ROTL for vector lanes.
   MVT::SimpleValueType VecVT = MVT::v2i32;
   SDValue PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_4, NonPow2_0});
+  SDValue PowAndNonPow = DAG->getBuildVector(VecVT, Loc, {Pow2_8, NonPow2_0});
+
   SDValue RotAmountVec =
       DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
-  SDValue RotlPowAndZero =
-      DAG->getNode(ISD::ROTL, Loc, VecVT, PowAndZero, RotAmountVec);
+
+  SDValue VecCond = DAG->getCopyFromReg(DAG->getEntryNode(), Loc, 2, MVT::v2i1);
+  SDValue Sel_pow2_vec =
+      DAG->getNode(ISD::VSELECT, Loc, VecVT, VecCond, PowAndZero, PowAndNonPow);
+
+  SDValue Rotl =
+      DAG->getNode(ISD::ROTL, Loc, VecVT, Sel_pow2_vec, RotAmountVec);
+
   APInt DemandAll(2, 3);
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlPowAndZero, DemandAll));
-  EXPECT_TRUE(
-      DAG->isKnownToBeAPowerOfTwo(RotlPowAndZero, DemandAll, /*OrZero=*/true));
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandAll));
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandAll, /*OrZero=*/true));
+
+  APInt DemandLo(2, 1);
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandLo));
+
+  APInt DemandHi(2, 2);
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandHi));
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandHi, /*OrZero=*/true));
 }
 
 TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTR) {
   SDLoc Loc;
 
   SDValue Pow2_16 = DAG->getConstant(16, Loc, MVT::i32);
+  SDValue Pow2_8 = DAG->getConstant(8, Loc, MVT::i32);
   SDValue NonPow2_6 = DAG->getConstant(6, Loc, MVT::i32);
+  SDValue NonPow2_9 = DAG->getConstant(9, Loc, MVT::i32);
   SDValue Zero = DAG->getConstant(0, Loc, MVT::i32);
 
   SDValue RotAmount = DAG->getConstant(5, Loc, MVT::i32);
 
-  SDValue RotrPow2 = DAG->getNode(ISD::ROTR, Loc, MVT::i32, Pow2_16, RotAmount);
+  SDValue Cond = DAG->getCopyFromReg(DAG->getEntryNode(), Loc, 3, MVT::i1);
+
+  SDValue Sel_pow2 =
+      DAG->getNode(ISD::SELECT, Loc, MVT::i32, Cond, Pow2_16, Pow2_8);
+  SDValue Sel_non_pow2 =
+      DAG->getNode(ISD::SELECT, Loc, MVT::i32, Cond, NonPow2_6, NonPow2_9);
+
+  SDValue RotrPow2 =
+      DAG->getNode(ISD::ROTR, Loc, MVT::i32, Sel_pow2, RotAmount);
   SDValue RotrNonPow2 =
-      DAG->getNode(ISD::ROTR, Loc, MVT::i32, NonPow2_6, RotAmount);
+      DAG->getNode(ISD::ROTR, Loc, MVT::i32, Sel_non_pow2, RotAmount);
   SDValue RotrZero = DAG->getNode(ISD::ROTR, Loc, MVT::i32, Zero, RotAmount);
 
   EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotrPow2));
@@ -1235,14 +1263,27 @@ TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTR) {
 
   MVT::SimpleValueType VecVT = MVT::v2i32;
   SDValue PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_16, Zero});
+  SDValue PowAndNonPow = DAG->getBuildVector(VecVT, Loc, {Pow2_8, Zero});
+
   SDValue RotAmountVec =
       DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
-  SDValue RotrPowAndZero =
-      DAG->getNode(ISD::ROTR, Loc, VecVT, PowAndZero, RotAmountVec);
+
+  SDValue VecCond = DAG->getCopyFromReg(DAG->getEntryNode(), Loc, 4, MVT::v2i1);
+  SDValue Sel_pow2_vec =
+      DAG->getNode(ISD::VSELECT, Loc, VecVT, VecCond, PowAndZero, PowAndNonPow);
+
+  SDValue Rotr =
+      DAG->getNode(ISD::ROTR, Loc, VecVT, Sel_pow2_vec, RotAmountVec);
   APInt DemandAll(2, 3);
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrPowAndZero, DemandAll));
-  EXPECT_TRUE(
-      DAG->isKnownToBeAPowerOfTwo(RotrPowAndZero, DemandAll, /*OrZero=*/true));
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandAll));
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandAll, /*OrZero=*/true));
+
+  APInt DemandLo(2, 1);
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandLo));
+
+  APInt DemandHi(2, 2);
+  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandHi));
+  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandHi, /*OrZero=*/true));
 }
 
 TEST_F(AArch64SelectionDAGTest, getRepeatedSequence_Patterns) {

>From ee5726e08c5199724a7ea4bad3a652c28e2b13e5 Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Mon, 2 Mar 2026 14:52:24 +0530
Subject: [PATCH 06/11] Add test for handling DemandElts for vectors in ROTL
 and ROTR

---
 llvm/test/CodeGen/X86/known-pow2.ll | 91 +++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 25 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 91af45a3f357c..b5e98f637609a 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -11,8 +11,6 @@ declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i32 @llvm.fshr.i32(i32, i32, i32)
-declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
-declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
 
 define <4 x i32> @pow2_non_splat_vec(<4 x i32> %x) {
 ; CHECK-LABEL: pow2_non_splat_vec:
@@ -1009,31 +1007,74 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
   ret i32 %r
 }
 
-define <2 x i32> @pow2_rotl_vec() {
-; CHECK-LABEL: pow2_rotl_vec:
-; CHECK:       # %bb.0:
-; CHECK:       xmm0 = [32,0,0,0]
-; CHECK:       retq
+define i1 @pow2_rotl_extract_vec(<2 x i32> %a0, i32 %rotamt, i32 %x) {
+; CHECK-LABEL: pow2_rotl_extract_vec:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    roll %cl, %eax
+; CHECK-NEXT:    notl %esi
+; CHECK-NEXT:    testl %esi, %eax
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
 entry:
-  ; build vector <4,0>
-  %v0 = insertelement <2 x i32> zeroinitializer, i32 4, i32 0
-  %v1 = insertelement <2 x i32> %v0, i32 0, i32 1
-  %amt0 = insertelement <2 x i32> zeroinitializer, i32 3, i32 0
-  %amt1 = insertelement <2 x i32> %amt0, i32 3, i32 1
-  %r = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %v1, <2 x i32> %v1, <2 x i32> %amt1)
-  ret <2 x i32> %r
+  %cmp = icmp sgt <2 x i32> zeroinitializer, %a0
+
+  %powvec = select <2 x i1> %cmp,
+                     <2 x i32> <i32 1024, i32 1024>,
+                     <2 x i32> <i32 4096, i32 4096>
+
+  %base = extractelement <2 x i32> %powvec, i32 0
+
+  %d = call i32 @llvm.fshl.i32(i32 %base,
+                               i32 %base,
+                               i32 %rotamt)
+
+  %and = and i32 %x, %d
+  %r = icmp eq i32 %and, %d
+
+  ret i1 %r
 }
 
-define <2 x i32> @pow2_rotr_vec() {
-; CHECK-LABEL: pow2_rotr_vec:
-; CHECK:       # %bb.0:
-; CHECK:       xmm0 = [2147483648,0,0,0]
-; CHECK:       retq
+define i1 @pow2_rotr_extract_vec(<2 x i32> %a0, i32 %rotamt, i32 %x) {
+; CHECK-LABEL: pow2_rotr_extract_vec:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    rorl %cl, %eax
+; CHECK-NEXT:    notl %esi
+; CHECK-NEXT:    testl %esi, %eax
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
 entry:
-  %v0 = insertelement <2 x i32> zeroinitializer, i32 16, i32 0
-  %v1 = insertelement <2 x i32> %v0, i32 0, i32 1
-  %amt0 = insertelement <2 x i32> zeroinitializer, i32 5, i32 0
-  %amt1 = insertelement <2 x i32> %amt0, i32 5, i32 1
-  %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %v1, <2 x i32> %v1, <2 x i32> %amt1)
-  ret <2 x i32> %r
+
+  %cmp = icmp sgt <2 x i32> zeroinitializer, %a0
+  %powvec = select <2 x i1> %cmp,
+                     <2 x i32> <i32 1024, i32 1024>,
+                     <2 x i32> <i32 4096, i32 4096>
+
+  %base = extractelement <2 x i32> %powvec, i32 0
+
+  %d = call i32 @llvm.fshr.i32(i32 %base,
+                               i32 %base,
+                               i32 %rotamt)
+
+  %and = and i32 %x, %d
+  %r = icmp eq i32 %and, %d
+
+  ret i1 %r
 }

>From 7ee0aa510616769719af3e8012951d9ac917352f Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Tue, 3 Mar 2026 11:26:50 +0530
Subject: [PATCH 07/11] Perform 'rot' on vectors and remove unittests

---
 llvm/test/CodeGen/X86/known-pow2.ll           | 116 ++++++++++--------
 .../AArch64/AArch64SelectionDAGTest.cpp       | 114 -----------------
 2 files changed, 63 insertions(+), 167 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index b5e98f637609a..8347385453e75 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -11,6 +11,8 @@ declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 define <4 x i32> @pow2_non_splat_vec(<4 x i32> %x) {
 ; CHECK-LABEL: pow2_non_splat_vec:
@@ -1007,74 +1009,82 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
   ret i32 %r
 }
 
-define i1 @pow2_rotl_extract_vec(<2 x i32> %a0, i32 %rotamt, i32 %x) {
+define i1 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x) {
 ; CHECK-LABEL: pow2_rotl_extract_vec:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
-; CHECK-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pxor %xmm2, %xmm2
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT:    movl $4096, %eax # imm = 0x1000
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movl $1024, %eax # imm = 0x400
+; CHECK-NEXT:    movd %eax, %xmm3
+; CHECK-NEXT:    pand %xmm2, %xmm3
+; CHECK-NEXT:    pandn %xmm0, %xmm2
+; CHECK-NEXT:    por %xmm2, %xmm3
+; CHECK-NEXT:    pslld $23, %xmm1
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT:    cvttps2dq %xmm1, %xmm0
+; CHECK-NEXT:    pmuludq %xmm3, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    por %xmm0, %xmm1
 ; CHECK-NEXT:    movd %xmm1, %eax
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    roll %cl, %eax
-; CHECK-NEXT:    notl %esi
-; CHECK-NEXT:    testl %esi, %eax
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    testl %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
-entry:
-  %cmp = icmp sgt <2 x i32> zeroinitializer, %a0
-
-  %powvec = select <2 x i1> %cmp,
-                     <2 x i32> <i32 1024, i32 1024>,
-                     <2 x i32> <i32 4096, i32 4096>
-
-  %base = extractelement <2 x i32> %powvec, i32 0
-
-  %d = call i32 @llvm.fshl.i32(i32 %base,
-                               i32 %base,
-                               i32 %rotamt)
-
-  %and = and i32 %x, %d
-  %r = icmp eq i32 %and, %d
 
+  %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+  %powvec = select <4 x i1> %cmp,
+                     <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>,
+                     <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
+  %d = call <4 x i32> @llvm.fshl.v4i32(
+           <4 x i32> %powvec,
+           <4 x i32> %powvec,
+           <4 x i32> %rotamt)
+  %elt = extractelement <4 x i32> %d, i32 0
+  %and = and i32 %x, %elt
+  %r = icmp eq i32 %and, %elt
   ret i1 %r
 }
 
-define i1 @pow2_rotr_extract_vec(<2 x i32> %a0, i32 %rotamt, i32 %x) {
+
+define i1 @pow2_rotr_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x) {
 ; CHECK-LABEL: pow2_rotr_extract_vec:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
-; CHECK-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pxor %xmm2, %xmm2
+; CHECK-NEXT:    pxor %xmm3, %xmm3
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT:    movl $4096, %eax # imm = 0x1000
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movl $1024, %eax # imm = 0x400
+; CHECK-NEXT:    movd %eax, %xmm4
+; CHECK-NEXT:    pand %xmm3, %xmm4
+; CHECK-NEXT:    pandn %xmm0, %xmm3
+; CHECK-NEXT:    por %xmm3, %xmm4
+; CHECK-NEXT:    psubd %xmm1, %xmm2
+; CHECK-NEXT:    pslld $23, %xmm2
+; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT:    cvttps2dq %xmm2, %xmm0
+; CHECK-NEXT:    pmuludq %xmm4, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    por %xmm0, %xmm1
 ; CHECK-NEXT:    movd %xmm1, %eax
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    rorl %cl, %eax
-; CHECK-NEXT:    notl %esi
-; CHECK-NEXT:    testl %esi, %eax
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    testl %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
-entry:
-
-  %cmp = icmp sgt <2 x i32> zeroinitializer, %a0
-  %powvec = select <2 x i1> %cmp,
-                     <2 x i32> <i32 1024, i32 1024>,
-                     <2 x i32> <i32 4096, i32 4096>
-
-  %base = extractelement <2 x i32> %powvec, i32 0
-
-  %d = call i32 @llvm.fshr.i32(i32 %base,
-                               i32 %base,
-                               i32 %rotamt)
-
-  %and = and i32 %x, %d
-  %r = icmp eq i32 %and, %d
-
+  %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
+  %powvec = select <4 x i1> %cmp,
+                     <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>,
+                     <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
+  %d = call <4 x i32> @llvm.fshr.v4i32(
+           <4 x i32> %powvec,
+           <4 x i32> %powvec,
+           <4 x i32> %rotamt)
+  %elt = extractelement <4 x i32> %d, i32 0
+  %and = and i32 %x, %elt
+  %r = icmp eq i32 %and, %elt
   ret i1 %r
 }
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index 4addfe97d98c1..b0c48e8c97995 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -1172,120 +1172,6 @@ TEST_F(AArch64SelectionDAGTest,
   EXPECT_EQ(SplatIdx, 0);
 }
 
-TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTL) {
-  SDLoc Loc;
-  // Power-of-two values
-  SDValue Pow2_4 = DAG->getConstant(4, Loc, MVT::i32);
-  SDValue Pow2_8 = DAG->getConstant(8, Loc, MVT::i32);
-  // Non-power-of-two values
-  SDValue NonPow2_3 = DAG->getConstant(3, Loc, MVT::i32);
-  SDValue NonPow2_9 = DAG->getConstant(9, Loc, MVT::i32);
-
-  // Zero value
-  SDValue NonPow2_0 = DAG->getConstant(0, Loc, MVT::i32);
-
-  SDValue RotAmount = DAG->getConstant(3, Loc, MVT::i32);
-
-  SDValue Cond = DAG->getCopyFromReg(DAG->getEntryNode(), Loc, 1, MVT::i1);
-
-  SDValue Sel_pow2 =
-      DAG->getNode(ISD::SELECT, Loc, MVT::i32, Cond, Pow2_4, Pow2_8);
-  SDValue Sel_non_pow2 =
-      DAG->getNode(ISD::SELECT, Loc, MVT::i32, Cond, NonPow2_3, NonPow2_9);
-
-  SDValue RotlPow2 =
-      DAG->getNode(ISD::ROTL, Loc, MVT::i32, Sel_pow2, RotAmount);
-  SDValue RotlNonPow2 =
-      DAG->getNode(ISD::ROTL, Loc, MVT::i32, Sel_non_pow2, RotAmount);
-  SDValue RotlZero =
-      DAG->getNode(ISD::ROTL, Loc, MVT::i32, NonPow2_0, RotAmount);
-
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlPow2));
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlNonPow2));
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotlZero));
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotlZero, /*OrZero=*/true));
-
-  // Also verify DemandedElts is forwarded through ROTL for vector lanes.
-  MVT::SimpleValueType VecVT = MVT::v2i32;
-  SDValue PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_4, NonPow2_0});
-  SDValue PowAndNonPow = DAG->getBuildVector(VecVT, Loc, {Pow2_8, NonPow2_0});
-
-  SDValue RotAmountVec =
-      DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
-
-  SDValue VecCond = DAG->getCopyFromReg(DAG->getEntryNode(), Loc, 2, MVT::v2i1);
-  SDValue Sel_pow2_vec =
-      DAG->getNode(ISD::VSELECT, Loc, VecVT, VecCond, PowAndZero, PowAndNonPow);
-
-  SDValue Rotl =
-      DAG->getNode(ISD::ROTL, Loc, VecVT, Sel_pow2_vec, RotAmountVec);
-
-  APInt DemandAll(2, 3);
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandAll));
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandAll, /*OrZero=*/true));
-
-  APInt DemandLo(2, 1);
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandLo));
-
-  APInt DemandHi(2, 2);
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandHi));
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotl, DemandHi, /*OrZero=*/true));
-}
-
-TEST_F(AArch64SelectionDAGTest, KnownToBeAPowerOfTwo_ROTR) {
-  SDLoc Loc;
-
-  SDValue Pow2_16 = DAG->getConstant(16, Loc, MVT::i32);
-  SDValue Pow2_8 = DAG->getConstant(8, Loc, MVT::i32);
-  SDValue NonPow2_6 = DAG->getConstant(6, Loc, MVT::i32);
-  SDValue NonPow2_9 = DAG->getConstant(9, Loc, MVT::i32);
-  SDValue Zero = DAG->getConstant(0, Loc, MVT::i32);
-
-  SDValue RotAmount = DAG->getConstant(5, Loc, MVT::i32);
-
-  SDValue Cond = DAG->getCopyFromReg(DAG->getEntryNode(), Loc, 3, MVT::i1);
-
-  SDValue Sel_pow2 =
-      DAG->getNode(ISD::SELECT, Loc, MVT::i32, Cond, Pow2_16, Pow2_8);
-  SDValue Sel_non_pow2 =
-      DAG->getNode(ISD::SELECT, Loc, MVT::i32, Cond, NonPow2_6, NonPow2_9);
-
-  SDValue RotrPow2 =
-      DAG->getNode(ISD::ROTR, Loc, MVT::i32, Sel_pow2, RotAmount);
-  SDValue RotrNonPow2 =
-      DAG->getNode(ISD::ROTR, Loc, MVT::i32, Sel_non_pow2, RotAmount);
-  SDValue RotrZero = DAG->getNode(ISD::ROTR, Loc, MVT::i32, Zero, RotAmount);
-
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotrPow2));
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrNonPow2));
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(RotrZero));
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(RotrZero, /*OrZero=*/true));
-
-  MVT::SimpleValueType VecVT = MVT::v2i32;
-  SDValue PowAndZero = DAG->getBuildVector(VecVT, Loc, {Pow2_16, Zero});
-  SDValue PowAndNonPow = DAG->getBuildVector(VecVT, Loc, {Pow2_8, Zero});
-
-  SDValue RotAmountVec =
-      DAG->getBuildVector(VecVT, Loc, {RotAmount, RotAmount});
-
-  SDValue VecCond = DAG->getCopyFromReg(DAG->getEntryNode(), Loc, 4, MVT::v2i1);
-  SDValue Sel_pow2_vec =
-      DAG->getNode(ISD::VSELECT, Loc, VecVT, VecCond, PowAndZero, PowAndNonPow);
-
-  SDValue Rotr =
-      DAG->getNode(ISD::ROTR, Loc, VecVT, Sel_pow2_vec, RotAmountVec);
-  APInt DemandAll(2, 3);
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandAll));
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandAll, /*OrZero=*/true));
-
-  APInt DemandLo(2, 1);
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandLo));
-
-  APInt DemandHi(2, 2);
-  EXPECT_FALSE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandHi));
-  EXPECT_TRUE(DAG->isKnownToBeAPowerOfTwo(Rotr, DemandHi, /*OrZero=*/true));
-}
-
 TEST_F(AArch64SelectionDAGTest, getRepeatedSequence_Patterns) {
   TargetLowering TL(*TM, *STI);
 

>From e6fe1d077ab49666401d6ba3bf5e304195db4f8b Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Tue, 3 Mar 2026 14:24:00 +0530
Subject: [PATCH 08/11] reformat: ignore col80 and use single lines

---
 llvm/test/CodeGen/X86/known-pow2.ll | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 8347385453e75..3f9886333f59a 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -1011,7 +1011,7 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
 
 define i1 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x) {
 ; CHECK-LABEL: pow2_rotl_extract_vec:
-; CHECK:       # %bb.0: # %entry
+; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pxor %xmm2, %xmm2
 ; CHECK-NEXT:    pcmpgtd %xmm0, %xmm2
 ; CHECK-NEXT:    movl $4096, %eax # imm = 0x1000
@@ -1035,13 +1035,8 @@ define i1 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x) {
 ; CHECK-NEXT:    retq
 
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
-  %powvec = select <4 x i1> %cmp,
-                     <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>,
-                     <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
-  %d = call <4 x i32> @llvm.fshl.v4i32(
-           <4 x i32> %powvec,
-           <4 x i32> %powvec,
-           <4 x i32> %rotamt)
+  %powvec = select <4 x i1> %cmp, <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>, <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
+  %d = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %powvec, <4 x i32> %powvec, <4 x i32> %rotamt)
   %elt = extractelement <4 x i32> %d, i32 0
   %and = and i32 %x, %elt
   %r = icmp eq i32 %and, %elt
@@ -1076,13 +1071,8 @@ define i1 @pow2_rotr_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x) {
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
-  %powvec = select <4 x i1> %cmp,
-                     <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>,
-                     <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
-  %d = call <4 x i32> @llvm.fshr.v4i32(
-           <4 x i32> %powvec,
-           <4 x i32> %powvec,
-           <4 x i32> %rotamt)
+  %powvec = select <4 x i1> %cmp, <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>, <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
+  %d = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %powvec, <4 x i32> %powvec, <4 x i32> %rotamt)
   %elt = extractelement <4 x i32> %d, i32 0
   %and = and i32 %x, %elt
   %r = icmp eq i32 %and, %elt

>From a95653f6656693cd9fac836dddb60dc14728512c Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Tue, 3 Mar 2026 16:12:14 +0530
Subject: [PATCH 09/11] Store the vector to avoid scalarisation, use 'urem' for
 comparision

---
 llvm/test/CodeGen/X86/known-pow2.ll | 86 +++++++++++++++++------------
 1 file changed, 50 insertions(+), 36 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 3f9886333f59a..18ecd95ea3578 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -1009,72 +1009,86 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
   ret i32 %r
 }
 
-define i1 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x) {
+define i32 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr %p) {
 ; CHECK-LABEL: pow2_rotl_extract_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    pxor %xmm2, %xmm2
 ; CHECK-NEXT:    pcmpgtd %xmm0, %xmm2
-; CHECK-NEXT:    movl $4096, %eax # imm = 0x1000
-; CHECK-NEXT:    movd %eax, %xmm0
-; CHECK-NEXT:    movl $1024, %eax # imm = 0x400
-; CHECK-NEXT:    movd %eax, %xmm3
-; CHECK-NEXT:    pand %xmm2, %xmm3
-; CHECK-NEXT:    pandn %xmm0, %xmm2
-; CHECK-NEXT:    por %xmm2, %xmm3
+; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT:    por %xmm0, %xmm2
 ; CHECK-NEXT:    pslld $23, %xmm1
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-NEXT:    cvttps2dq %xmm1, %xmm0
-; CHECK-NEXT:    pmuludq %xmm3, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    movd %xmm1, %eax
-; CHECK-NEXT:    notl %edi
-; CHECK-NEXT:    testl %edi, %eax
-; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; CHECK-NEXT:    pmuludq %xmm0, %xmm2
+; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT:    pmuludq %xmm1, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT:    por %xmm3, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, (%rsi)
+; CHECK-NEXT:    movd %xmm1, %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ecx
+; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    retq
 
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
   %powvec = select <4 x i1> %cmp, <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>, <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
   %d = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %powvec, <4 x i32> %powvec, <4 x i32> %rotamt)
+  store <4 x i32> %d, ptr %p
   %elt = extractelement <4 x i32> %d, i32 0
-  %and = and i32 %x, %elt
-  %r = icmp eq i32 %and, %elt
-  ret i1 %r
+  %res = urem i32 %x, %elt
+  ret i32 %res
 }
 
 
-define i1 @pow2_rotr_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x) {
+define i32 @pow2_rotr_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr %p) {
 ; CHECK-LABEL: pow2_rotr_extract_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    pxor %xmm2, %xmm2
 ; CHECK-NEXT:    pxor %xmm3, %xmm3
 ; CHECK-NEXT:    pcmpgtd %xmm0, %xmm3
-; CHECK-NEXT:    movl $4096, %eax # imm = 0x1000
-; CHECK-NEXT:    movd %eax, %xmm0
-; CHECK-NEXT:    movl $1024, %eax # imm = 0x400
-; CHECK-NEXT:    movd %eax, %xmm4
-; CHECK-NEXT:    pand %xmm3, %xmm4
-; CHECK-NEXT:    pandn %xmm0, %xmm3
-; CHECK-NEXT:    por %xmm3, %xmm4
+; CHECK-NEXT:    movdqa %xmm3, %xmm0
+; CHECK-NEXT:    pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-NEXT:    por %xmm0, %xmm3
 ; CHECK-NEXT:    psubd %xmm1, %xmm2
 ; CHECK-NEXT:    pslld $23, %xmm2
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
 ; CHECK-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
 ; CHECK-NEXT:    cvttps2dq %xmm2, %xmm0
-; CHECK-NEXT:    pmuludq %xmm4, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    movd %xmm1, %eax
-; CHECK-NEXT:    notl %edi
-; CHECK-NEXT:    testl %edi, %eax
-; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; CHECK-NEXT:    pmuludq %xmm0, %xmm3
+; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT:    pmuludq %xmm1, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT:    por %xmm2, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, (%rsi)
+; CHECK-NEXT:    movd %xmm1, %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ecx
+; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
   %powvec = select <4 x i1> %cmp, <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>, <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>
   %d = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %powvec, <4 x i32> %powvec, <4 x i32> %rotamt)
+  store <4 x i32> %d, ptr %p
   %elt = extractelement <4 x i32> %d, i32 0
-  %and = and i32 %x, %elt
-  %r = icmp eq i32 %and, %elt
-  ret i1 %r
+  %res = urem i32 %x, %elt
+  ret i32 %res
 }

>From 446555d11fcc5107d8f997b9ac3bfa8720c01196 Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Tue, 3 Mar 2026 16:21:57 +0530
Subject: [PATCH 10/11] update known-pow2.ll with update_llc_test_checks.py

---
 llvm/test/CodeGen/X86/known-pow2.ll | 64 +++++++++++++++++------------
 1 file changed, 37 insertions(+), 27 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index 18ecd95ea3578..a6f575a381872 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -11,8 +11,6 @@ declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i32 @llvm.fshr.i32(i32, i32, i32)
-declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 define <4 x i32> @pow2_non_splat_vec(<4 x i32> %x) {
 ; CHECK-LABEL: pow2_non_splat_vec:
@@ -30,16 +28,16 @@ define <4 x i32> @pow2_non_splat_vec_fail0(<4 x i32> %x) {
 ; CHECK-NEXT:    pmuludq %xmm0, %xmm1
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1073741824,1073741824,67108864,67108864]
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1073741824,u,67108864,u]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
-; CHECK-NEXT:    movdqa %xmm1, %xmm4
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
-; CHECK-NEXT:    psrld $1, %xmm1
-; CHECK-NEXT:    movss {{.*#+}} xmm4 = xmm1[0],xmm4[1,2,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [9,4,16,64]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; CHECK-NEXT:    movdqa %xmm1, %xmm3
+; CHECK-NEXT:    psrld $1, %xmm3
+; CHECK-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,3]
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [9,4,16,64]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [4,4,64,64]
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [4,u,64,u]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; CHECK-NEXT:    psubd %xmm1, %xmm0
@@ -51,6 +49,7 @@ define <4 x i32> @pow2_non_splat_vec_fail0(<4 x i32> %x) {
 define i32 @pow2_extractelt_vec(<4 x i32> %a0, ptr %p1, i32 %a2) {
 ; CHECK-LABEL: pow2_extractelt_vec:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    pxor %xmm1, %xmm1
 ; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
 ; CHECK-NEXT:    movdqa %xmm1, %xmm0
@@ -58,9 +57,10 @@ define i32 @pow2_extractelt_vec(<4 x i32> %a0, ptr %p1, i32 %a2) {
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-NEXT:    por %xmm0, %xmm1
 ; CHECK-NEXT:    movdqa %xmm1, (%rdi)
-; CHECK-NEXT:    movd %xmm1, %eax
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    movd %xmm1, %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ecx
+; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
   %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
@@ -831,11 +831,16 @@ define <4 x i32> @pow2_and_vector(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-NEXT:    cvttps2dq %xmm1, %xmm1
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,u]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [2,2,u,u]
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
-; CHECK-NEXT:    andps %xmm1, %xmm0
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,9]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [2,u,9,u]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-NEXT:    pxor %xmm2, %xmm2
+; CHECK-NEXT:    psubd %xmm1, %xmm2
+; CHECK-NEXT:    pand %xmm1, %xmm2
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,1,0]
+; CHECK-NEXT:    pand %xmm1, %xmm0
 ; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %yy = shl nuw nsw <4 x i32> <i32 1, i32 2, i32 4, i32 9>, %y
@@ -852,10 +857,14 @@ define i1 @pow2_and_fail0(i32 %x, i32 %y) {
 ; CHECK-LABEL: pow2_and_fail0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    movl $4, %eax
 ; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrl %cl, %edi
-; CHECK-NEXT:    testb $4, %dil
+; CHECK-NEXT:    shll %cl, %eax
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    negl %ecx
+; CHECK-NEXT:    andl %eax, %ecx
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    testl %edi, %ecx
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %yy = shl i32 4, %y
@@ -979,11 +988,12 @@ define i32 @pow2_blsi_add_fail(i32 %x, i32 %a) {
 define i32 @pow2_blsi_add(i32 %x, i32 %a) {
 ; CHECK-LABEL: pow2_blsi_add:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    negl %eax
-; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    notl %edi
-; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    movl %esi, %ecx
+; CHECK-NEXT:    negl %ecx
+; CHECK-NEXT:    andl %esi, %ecx
+; CHECK-NEXT:    leal (%rdi,%rcx), %eax
+; CHECK-NEXT:    andl %ecx, %eax
 ; CHECK-NEXT:    retq
   %neg_a = sub i32 0, %a
   %y = and i32 %a, %neg_a
@@ -999,7 +1009,7 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    subl %eax, %edi
 ; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
   %neg_a = sub i32 0, %a

>From f9c2d031ddd867d792971184b28db431f0776a45 Mon Sep 17 00:00:00 2001
From: Immad Mir <mirimmad17 at gmail.com>
Date: Wed, 4 Mar 2026 18:02:54 +0530
Subject: [PATCH 11/11] upadate known-pow2.ll wihth update_llc_test_checks.py

---
 llvm/test/CodeGen/X86/known-pow2.ll | 78 ++++++++++++-----------------
 1 file changed, 31 insertions(+), 47 deletions(-)

diff --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index a6f575a381872..940bb62118abd 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -28,16 +28,16 @@ define <4 x i32> @pow2_non_splat_vec_fail0(<4 x i32> %x) {
 ; CHECK-NEXT:    pmuludq %xmm0, %xmm1
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1073741824,u,67108864,u]
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1073741824,1073741824,67108864,67108864]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; CHECK-NEXT:    movdqa %xmm1, %xmm3
-; CHECK-NEXT:    psrld $1, %xmm3
-; CHECK-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [9,4,16,64]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; CHECK-NEXT:    movdqa %xmm1, %xmm4
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; CHECK-NEXT:    psrld $1, %xmm1
+; CHECK-NEXT:    movss {{.*#+}} xmm4 = xmm1[0],xmm4[1,2,3]
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [9,4,16,64]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [4,u,64,u]
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [4,4,64,64]
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; CHECK-NEXT:    psubd %xmm1, %xmm0
@@ -49,7 +49,6 @@ define <4 x i32> @pow2_non_splat_vec_fail0(<4 x i32> %x) {
 define i32 @pow2_extractelt_vec(<4 x i32> %a0, ptr %p1, i32 %a2) {
 ; CHECK-LABEL: pow2_extractelt_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    pxor %xmm1, %xmm1
 ; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
 ; CHECK-NEXT:    movdqa %xmm1, %xmm0
@@ -57,10 +56,9 @@ define i32 @pow2_extractelt_vec(<4 x i32> %a0, ptr %p1, i32 %a2) {
 ; CHECK-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-NEXT:    por %xmm0, %xmm1
 ; CHECK-NEXT:    movdqa %xmm1, (%rdi)
-; CHECK-NEXT:    movd %xmm1, %ecx
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divl %ecx
-; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    andl %esi, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
   %sel = select <4 x i1> %cmp, <4 x i32> <i32 4, i32 2, i32 1, i32 0>, <4 x i32> <i32 8, i32 4, i32 2, i32 -1>
@@ -831,16 +829,11 @@ define <4 x i32> @pow2_and_vector(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
 ; CHECK-NEXT:    cvttps2dq %xmm1, %xmm1
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,9]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [2,u,9,u]
-; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; CHECK-NEXT:    pxor %xmm2, %xmm2
-; CHECK-NEXT:    psubd %xmm1, %xmm2
-; CHECK-NEXT:    pand %xmm1, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,1,0]
-; CHECK-NEXT:    pand %xmm1, %xmm0
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,u]
+; CHECK-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [2,2,u,u]
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
+; CHECK-NEXT:    andps %xmm1, %xmm0
 ; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %yy = shl nuw nsw <4 x i32> <i32 1, i32 2, i32 4, i32 9>, %y
@@ -857,14 +850,10 @@ define i1 @pow2_and_fail0(i32 %x, i32 %y) {
 ; CHECK-LABEL: pow2_and_fail0:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    movl $4, %eax
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shll %cl, %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    negl %ecx
-; CHECK-NEXT:    andl %eax, %ecx
 ; CHECK-NEXT:    notl %edi
-; CHECK-NEXT:    testl %edi, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, %edi
+; CHECK-NEXT:    testb $4, %dil
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %yy = shl i32 4, %y
@@ -988,12 +977,11 @@ define i32 @pow2_blsi_add_fail(i32 %x, i32 %a) {
 define i32 @pow2_blsi_add(i32 %x, i32 %a) {
 ; CHECK-LABEL: pow2_blsi_add:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    movl %esi, %ecx
-; CHECK-NEXT:    negl %ecx
-; CHECK-NEXT:    andl %esi, %ecx
-; CHECK-NEXT:    leal (%rdi,%rcx), %eax
-; CHECK-NEXT:    andl %ecx, %eax
+; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    notl %edi
+; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
   %neg_a = sub i32 0, %a
   %y = and i32 %a, %neg_a
@@ -1009,7 +997,7 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    subl %eax, %edi
+; CHECK-NEXT:    notl %edi
 ; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
   %neg_a = sub i32 0, %a
@@ -1022,7 +1010,6 @@ define i32 @pow2_blsi_sub(i32 %x, i32 %a) {
 define i32 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr %p) {
 ; CHECK-LABEL: pow2_rotl_extract_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    pxor %xmm2, %xmm2
 ; CHECK-NEXT:    pcmpgtd %xmm0, %xmm2
 ; CHECK-NEXT:    movdqa %xmm2, %xmm0
@@ -1045,10 +1032,9 @@ define i32 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr
 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; CHECK-NEXT:    por %xmm3, %xmm1
 ; CHECK-NEXT:    movdqa %xmm1, (%rsi)
-; CHECK-NEXT:    movd %xmm1, %ecx
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divl %ecx
-; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
 
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
@@ -1064,7 +1050,6 @@ define i32 @pow2_rotl_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr
 define i32 @pow2_rotr_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr %p) {
 ; CHECK-LABEL: pow2_rotr_extract_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    pxor %xmm2, %xmm2
 ; CHECK-NEXT:    pxor %xmm3, %xmm3
 ; CHECK-NEXT:    pcmpgtd %xmm0, %xmm3
@@ -1089,10 +1074,9 @@ define i32 @pow2_rotr_extract_vec(<4 x i32> %a0, <4 x i32> %rotamt, i32 %x, ptr
 ; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
 ; CHECK-NEXT:    por %xmm2, %xmm1
 ; CHECK-NEXT:    movdqa %xmm1, (%rsi)
-; CHECK-NEXT:    movd %xmm1, %ecx
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divl %ecx
-; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    andl %edi, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt <4 x i32> zeroinitializer, %a0
   %powvec = select <4 x i1> %cmp, <4 x i32> <i32 1024, i32 1235, i32 2048, i32 4096>, <4 x i32> <i32 4096, i32 5679, i32 8192, i32 16384>