[llvm] [InstCombine] Rotate transformation port from SelectionDAG to InstCombine (PR #160628)

Axel Sorenson via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 12:06:36 PDT 2025


https://github.com/axelcool1234 updated https://github.com/llvm/llvm-project/pull/160628

>From 56dd41cd1e42b8750bb0e6c8e05611f2b5f1c64c Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Wed, 24 Sep 2025 19:58:33 -0600
Subject: [PATCH 1/4] pre-commit

---
 .../InstCombine/InstCombineCalls.cpp          | 18 +++++
 llvm/test/Transforms/InstCombine/fsh.ll       | 75 +++++++++++++++++++
 2 files changed, 93 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 6ad493772d170..bcbe28a1080c4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2405,6 +2405,24 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
               matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
                                      /*MatchBitReversals*/ true))
         return BitOp;
+
+      // R = fshl(X, X, C2)
+      // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
+      Value *InnerOp0;
+      Value *InnerOp1;
+      Constant *ShAmtInnerC;
+      if (match(Op0, m_FShl(m_Value(InnerOp0), m_Value(InnerOp1),
+                            m_ImmConstant(ShAmtInnerC))) &&
+          Op0 == Op1 && InnerOp0 == InnerOp1) {
+        APInt Sum =
+            ShAmtC->getUniqueInteger() + ShAmtInnerC->getUniqueInteger();
+        APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
+        if (Modulo.isZero())
+          return replaceInstUsesWith(*II, InnerOp0);
+        Constant *ModuloC = ConstantInt::get(Ty, Modulo);
+        return CallInst::Create(cast<IntrinsicInst>(Op0)->getCalledFunction(),
+                                {InnerOp0, InnerOp1, ModuloC});
+      }
     }
 
     // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 0325c60997dfd..334c613c9e8d1 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -1214,3 +1214,78 @@ define i31 @fshr_neg_amount_non_power_two(i31 %x, i31 %y) {
   %r = call i31 @llvm.fshr.i31(i31 %x, i31 %x, i31 %n)
   ret i31 %r
 }
+
+define i32 @rot_const_consecutive(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 13)
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 27)
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 13)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 27)
+  ret i32 %r2
+}
+
+define i32 @rot_const_consecutive_multi_use(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive_multi_use(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 4)
+; CHECK-NEXT:    [[R2:%.*]] = and i32 [[R]], [[R3]]
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 4)
+  %and = and i32 %r, %r2
+  ret i32 %and
+}
+
+define i32 @rot_const_consecutive_cancel_out(i32 %x) {
+; CHECK-LABEL: @rot_const_consecutive_cancel_out(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X1:%.*]], i32 [[X1]], i32 7)
+; CHECK-NEXT:    [[X:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 25)
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 25)
+  ret i32 %r2
+}
+
+;; negative test, consecutive rotates only fold if shift amounts are const
+
+define i32 @rot_nonconst_shift(i32 %x, i32 %amt) {
+; CHECK-LABEL: @rot_nonconst_shift(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 [[AMT:%.*]])
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 %amt)
+  ret i32 %r2
+}
+
+;; negative test, 1st funnel shift isn't a rotate.
+
+define i32 @fsh_rot(i32 %x, i32 %y) {
+; CHECK-LABEL: @fsh_rot(
+; CHECK-NEXT:    [[FSH:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 7)
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[FSH]], i32 [[FSH]], i32 4)
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %fsh = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
+  %r = call i32 @llvm.fshl.i32(i32 %fsh, i32 %fsh, i32 4)
+  ret i32 %r
+}
+
+;; negative test, 2nd funnel shift isn't a rotate.
+
+define i32 @rot_fsh(i32 %x, i32 %y) {
+; CHECK-LABEL: @rot_fsh(
+; CHECK-NEXT:    [[Y:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[Y]], i32 [[R:%.*]], i32 4)
+; CHECK-NEXT:    ret i32 [[R2]]
+;
+  %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
+  %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %y, i32 4)
+  ret i32 %r2
+}
+

>From 021e1b6498bd5c444a763a24a9018978ff903253 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Wed, 24 Sep 2025 20:03:29 -0600
Subject: [PATCH 2/4] The rotate transformation from
 https://github.com/llvm/llvm-project/blob/72c04bb882ad70230bce309c3013d9cc2c99e9a7/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L10312-L10337
 has no middle-end equivalent in InstCombine. The following is a port of that
 transformation to InstCombine.

---
 llvm/test/Transforms/InstCombine/fsh.ll | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 334c613c9e8d1..28c541e1a9eb2 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -1217,8 +1217,7 @@ define i31 @fshr_neg_amount_non_power_two(i31 %x, i31 %y) {
 
 define i32 @rot_const_consecutive(i32 %x) {
 ; CHECK-LABEL: @rot_const_consecutive(
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 13)
-; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 27)
+; CHECK-NEXT:    [[R2:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 8)
 ; CHECK-NEXT:    ret i32 [[R2]]
 ;
   %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 13)
@@ -1229,7 +1228,7 @@ define i32 @rot_const_consecutive(i32 %x) {
 define i32 @rot_const_consecutive_multi_use(i32 %x) {
 ; CHECK-LABEL: @rot_const_consecutive_multi_use(
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 7)
-; CHECK-NEXT:    [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 4)
+; CHECK-NEXT:    [[R3:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 11)
 ; CHECK-NEXT:    [[R2:%.*]] = and i32 [[R]], [[R3]]
 ; CHECK-NEXT:    ret i32 [[R2]]
 ;
@@ -1241,9 +1240,7 @@ define i32 @rot_const_consecutive_multi_use(i32 %x) {
 
 define i32 @rot_const_consecutive_cancel_out(i32 %x) {
 ; CHECK-LABEL: @rot_const_consecutive_cancel_out(
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X1:%.*]], i32 [[X1]], i32 7)
-; CHECK-NEXT:    [[X:%.*]] = call i32 @llvm.fshl.i32(i32 [[R]], i32 [[R]], i32 25)
-; CHECK-NEXT:    ret i32 [[X]]
+; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
   %r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 7)
   %r2 = call i32 @llvm.fshl.i32(i32 %r, i32 %r, i32 25)

>From 1b3e617369f4d44333b76124d76a3ac184f31c79 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Wed, 24 Sep 2025 21:31:43 -0600
Subject: [PATCH 3/4] added m_Deferred

---
 llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bcbe28a1080c4..e4e356bfd2d72 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2408,20 +2408,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
 
       // R = fshl(X, X, C2)
       // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
-      Value *InnerOp0;
-      Value *InnerOp1;
+      Value *InnerOp;
       Constant *ShAmtInnerC;
-      if (match(Op0, m_FShl(m_Value(InnerOp0), m_Value(InnerOp1),
+      if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
                             m_ImmConstant(ShAmtInnerC))) &&
-          Op0 == Op1 && InnerOp0 == InnerOp1) {
+          Op0 == Op1) {
         APInt Sum =
             ShAmtC->getUniqueInteger() + ShAmtInnerC->getUniqueInteger();
         APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
         if (Modulo.isZero())
-          return replaceInstUsesWith(*II, InnerOp0);
+          return replaceInstUsesWith(*II, InnerOp);
         Constant *ModuloC = ConstantInt::get(Ty, Modulo);
         return CallInst::Create(cast<IntrinsicInst>(Op0)->getCalledFunction(),
-                                {InnerOp0, InnerOp1, ModuloC});
+                                {InnerOp, InnerOp, ModuloC});
       }
     }
 

>From 752eef061064eb64a1be1ce93160c275f4634362 Mon Sep 17 00:00:00 2001
From: Axel Sorenson <AxelPSorenson at gmail.com>
Date: Thu, 25 Sep 2025 13:06:27 -0600
Subject: [PATCH 4/4] Allow non-splat consant vectors

Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
 llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e4e356bfd2d72..0ee15a8cdf905 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2414,7 +2414,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
                             m_ImmConstant(ShAmtInnerC))) &&
           Op0 == Op1) {
         APInt Sum =
-            ShAmtC->getUniqueInteger() + ShAmtInnerC->getUniqueInteger();
+            *ShAmtOuterC + *ShAmtInnerC;
         APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
         if (Modulo.isZero())
           return replaceInstUsesWith(*II, InnerOp);



More information about the llvm-commits mailing list