[llvm] [InstCombine] Fold rotate patterns with ZExt/Trunc at different Points (PR #142578)

Tue Jun 3 06:15:10 PDT 2025

https://github.com/sallto updated https://github.com/llvm/llvm-project/pull/142578

>From d10b209daf7bed9e6832b6374459b05982850f57 Mon Sep 17 00:00:00 2001
From: sallto <thomas at saller.com.de>
Date: Mon, 19 May 2025 17:19:14 +0200
Subject: [PATCH 1/2] [InstCombine] Fold rotate patterns with ZExt/Trunc
 Rotation pattern now fold with Trunc or if changes in the width happen at
 different points for the left and right side.
 https://alive2.llvm.org/ce/z/RkALLB fixes #138334

---
 llvm/include/llvm/IR/PatternMatch.h           |   7 +
 .../InstCombine/InstCombineAndOrXor.cpp       |  58 +++++---
 llvm/test/Transforms/InstCombine/rotate.ll    | 137 ++++++++++++++++++
 llvm/unittests/IR/PatternMatch.cpp            |  33 +++++
 4 files changed, 217 insertions(+), 18 deletions(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 6242a686e7bc0..b3061e6d4ccdc 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2160,6 +2160,13 @@ m_ZExtOrSelf(const OpTy &Op) {
   return m_CombineOr(m_ZExt(Op), Op);
 }
 
+template <typename OpTy>
+inline match_combine_or<CastInst_match<OpTy, TruncInst>,
+                        match_combine_or<CastInst_match<OpTy, ZExtInst>, OpTy>>
+m_TruncOrZExtOrSelf(const OpTy &Op) {
+  return m_CombineOr(m_Trunc(Op), m_ZExtOrSelf(Op));
+}
+
 template <typename OpTy>
 inline match_combine_or<CastInst_match<OpTy, SExtInst>, OpTy>
 m_SExtOrSelf(const OpTy &Op) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 59b46ebdb72e2..65614d0c23720 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2969,31 +2969,53 @@ InstCombinerImpl::convertOrOfShiftsToFunnelShift(Instruction &Or) {
       if (!isPowerOf2_32(Width))
         return nullptr;
 
-      // The shift amount may be masked with negation:
-      // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+      // Check that L and R operate on the same value X. Since the bitwidth of X
+      // can differ from L and R, there are multiple possible locations of ZExt
+      // or Trunc.
       Value *X;
+      const APInt *LMask = nullptr;
+      const APInt *RMask = nullptr;
       unsigned Mask = Width - 1;
-      if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
-          match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
-        return X;
+      // L is essentially a no-op except for changing the type of X.
+      // There are multiple pattern like X & LMask or ZExt/Trunc
+      match(L, m_TruncOrZExtOrSelf(m_CombineOr(
+                   m_And(m_TruncOrZExtOrSelf(m_Value(X)), m_APInt(LMask)),
+                   m_Value(X))));
+
+      // R should be -X, sometimes (-X) & RMask is used, which is equivalent if
+      // RMask >= BitWidth - 1
+      const Value *ValueToNegate = nullptr;
+      if (!match(R, m_TruncOrZExtOrSelf(m_CombineOr(
+                        m_And(m_Neg(m_Value(ValueToNegate)), m_APInt(RMask)),
+                        m_Neg(m_Value(ValueToNegate))))) ||
+          (RMask && RMask->ult(Mask)))
+        return nullptr;
 
-      // (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1)))
-      if (match(R, m_And(m_Neg(m_Specific(L)), m_SpecificInt(Mask))))
-        return L;
+      // ValueToNegate can be L if the rotate uses a bitwise-and on the shift
+      // amount before the rotate pattern.
+      if (!match(ValueToNegate, m_TruncOrZExtOrSelf(
+                                    m_CombineOr(m_Specific(X), m_Specific(L)))))
+        return nullptr;
 
-      // Similar to above, but the shift amount may be extended after masking,
-      // so return the extended value as the parameter for the intrinsic.
-      if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
-          match(R,
-                m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
-                      m_SpecificInt(Mask))))
+      // L is a no-op, and L is guaranteed to be the same type as the rotate.
+      // We reuse the existing Zext/Trunc.
+      if (!LMask)
         return L;
 
-      if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
-          match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
-        return L;
+      // We can still fold with an LMask < Mask if R soley depends on L (not on
+      // X directly)
+      if (LMask->ult(Mask))
+        return (match(ValueToNegate, m_TruncOrZExtOrSelf(m_Specific(L))))
+                   ? L
+                   : nullptr;
 
-      return nullptr;
+      // X has the same width as L and LMask >= BitWidth - 1, so L is a no-op.
+      Value *matchedX;
+      if (match(L, m_And(m_Value(matchedX), m_Value())))
+        return matchedX;
+
+      // L is Zext(And(...)), we can't reuse the Zext/Trunc.
+      return L;
     };
 
     Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index a4d4ec375954f..3c416699f8dd7 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -698,6 +698,104 @@ define i64 @rotateleft_64_zext_neg_mask_amount(i64 %0, i32 %1) {
   ret i64 %10
 }
 
+define i64 @rotateright_64_zext_double_conversion(i64 %x, i32 %y) {
+; CHECK-LABEL: @rotateright_64_zext_double_conversion(
+; CHECK-NEXT:    [[Z:%.*]] = zext nneg i32 [[Y:%.*]] to i64
+; CHECK-NEXT:    [[OR:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Z]])
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+  %z = zext i32 %y to i64
+  %neg = sub nsw i32 0, %y
+  %and2 = and i32 %neg, 63
+  %conv = zext i32 %and2 to i64
+  %shl = shl i64 %x, %conv
+  %shr = lshr i64 %x, %z
+  %or = or i64 %shr, %shl
+  ret i64 %or
+}
+
+define i32 @rotateright_32_trunc_early(i32 %x, i64 %y) {
+; CHECK-LABEL: @rotateright_32_trunc_early(
+; CHECK-NEXT:    [[Z:%.*]] = trunc i64 [[Y:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Z]])
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %z = trunc i64 %y to i32
+  %neg = sub nsw i32 0, %z
+  %and2 = and i32 %neg, 31
+  %shl = shl i32 %x, %and2
+  %shr = lshr i32 %x, %z
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
+define i32 @rotateright_32_trunc_neg_mask_amount(i32 %x, i64 %y) {
+; CHECK-LABEL: @rotateright_32_trunc_neg_mask_amount(
+; CHECK-NEXT:    [[Z:%.*]] = trunc i64 [[Y:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Z]])
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %z = trunc i64 %y to i32
+  %neg = sub  i64 0, %y
+  %and2 = and i64 %neg, 31
+  %conv = trunc i64 %and2 to i32
+  %shl = shl i32 %x, %conv
+  %shr = lshr i32 %x, %z
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
+; restrict the shift amount before rotating
+
+define i32 @rotateleft_32_restricted_shamt(i32 %x, i32 %shAmt) {
+; CHECK-LABEL: @rotateleft_32_restricted_shamt(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 30
+; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 [[AND]])
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %x, 30
+  %shl = shl i32 %x, %and
+  %sub = sub i32 0, %and
+  %shr = lshr i32 %x, %sub
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; unncessarily large and masks
+
+define i32 @rotateleft_32_non_restricted_shamt(i32 %x, i32 %t) {
+; CHECK-LABEL: @rotateleft_32_non_restricted_shamt(
+; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[T:%.*]])
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %and = and i32 %t, 31
+  %shl = shl i32 %x, %and
+  %sub = sub nsw i32 0, %and
+  %and2 = and i32 %sub, 31
+  %shr = lshr i32 %x, %and2
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; negative test - right and mask is too small (should be >=31)
+
+define i32 @rotateleft_32_incorrect_right_mask(i32 %x, i32  %t) {
+; CHECK-LABEL: @rotateleft_32_incorrect_right_mask(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[T:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[T]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SUB]], 30
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[AND]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %shl = shl i32 %x, %t
+  %sub = sub nsw i32 0, %t
+  %and = and i32 %sub, 30
+  %shr = lshr i32 %x, %and
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
 ; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
 
 define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
@@ -1086,3 +1184,42 @@ define i32 @not_rotl_i32_add_less(i32 %x, i32 %y) {
   %r = add i32 %shr, %shl
   ret i32 %r
 }
+
+; multi-use tests
+define i32 @rotateleft_32_use_zext(i32 %x, i16 %shAmt) {
+; CHECK-LABEL: @rotateleft_32_use_zext(
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[SHAMT:%.*]] to i32
+; CHECK-NEXT:    call void @use(i32 [[CONV]])
+; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[CONV]])
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %conv = zext i16 %shAmt to i32
+  call void @use(i32 %conv)
+  %shl = shl i32 %x, %conv
+  %sub = sub i32 0, %conv
+  %shr = lshr i32 %x, %sub
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @rotateleft_64_use_and(i64 %x, i32 %y) {
+; CHECK-LABEL: @rotateleft_64_use_and(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[Y:%.*]], 63
+; CHECK-NEXT:    [[Z:%.*]] = zext nneg i32 [[AND]] to i64
+; CHECK-NEXT:    call void @use(i64 [[Z]])
+; CHECK-NEXT:    [[OR:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Z]])
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+  %and = and i32 %y, 63
+  %z = zext i32 %and to i64
+  call void @use(i64 %z)
+  %neg = sub nsw i32 0, %y
+  %and2 = and i32 %neg, 63
+  %conv = zext i32 %and2 to i64
+  %shl = shl i64 %x, %conv
+  %shr = lshr i64 %x, %z
+  %or = or i64 %shr, %shl
+  ret i64 %or
+}
+
+declare void @use(i32)
diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index 7b3a4ce365453..a5687b92e8310 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -635,6 +635,39 @@ TEST_F(PatternMatchTest, ZExtSExtSelf) {
   EXPECT_TRUE(m_ZExtOrSExtOrSelf(m_One()).match(One64S));
 }
 
+TEST_F(PatternMatchTest, TruncZextSelf) {
+  LLVMContext &Ctx = IRB.getContext();
+
+  Value *One32 = IRB.getInt32(1);
+  Value *One64 = IRB.getInt64(1);
+  Value *One32T = IRB.CreateTrunc(One64, IRB.getInt32Ty());
+  Value *One64Z = IRB.CreateZExt(One32, IntegerType::getInt64Ty(Ctx));
+  Value *One64S = IRB.CreateSExt(One32, IntegerType::getInt64Ty(Ctx));
+
+  EXPECT_TRUE(m_One().match(One32));
+  EXPECT_TRUE(m_One().match(One64));
+  EXPECT_FALSE(m_One().match(One32T));
+  EXPECT_FALSE(m_One().match(One64Z));
+  EXPECT_FALSE(m_One().match(One64S));
+
+  EXPECT_FALSE(m_Trunc(m_One()).match(One32));
+  EXPECT_TRUE(m_Trunc(m_One()).match(One32T));
+  EXPECT_FALSE(m_Trunc(m_One()).match(One64Z));
+  EXPECT_FALSE(m_Trunc(m_One()).match(One64S));
+
+  EXPECT_FALSE(m_ZExt(m_One()).match(One32));
+  EXPECT_FALSE(m_ZExt(m_One()).match(One64));
+  EXPECT_FALSE(m_ZExt(m_One()).match(One32T));
+  EXPECT_TRUE(m_ZExt(m_One()).match(One64Z));
+  EXPECT_FALSE(m_ZExt(m_One()).match(One64S));
+
+  EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One32));
+  EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One64));
+  EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One32T));
+  EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One64Z));
+  EXPECT_FALSE(m_TruncOrZExtOrSelf(m_One()).match(One64S));
+}
+
 TEST_F(PatternMatchTest, BitCast) {
   Value *OneDouble = ConstantFP::get(IRB.getDoubleTy(), APFloat(1.0));
   Value *ScalableDouble = ConstantFP::get(

>From 1cab9ecd75aa121f1b21ac69249fc45b49be159b Mon Sep 17 00:00:00 2001
From: sallto <thomas at saller.com.de>
Date: Tue, 3 Jun 2025 15:12:41 +0200
Subject: [PATCH 2/2] fix typo in test case

---
 llvm/test/Transforms/InstCombine/rotate.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index 3c416699f8dd7..0c9aac8f55170 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -749,11 +749,11 @@ define i32 @rotateright_32_trunc_neg_mask_amount(i32 %x, i64 %y) {
 
 define i32 @rotateleft_32_restricted_shamt(i32 %x, i32 %shAmt) {
 ; CHECK-LABEL: @rotateleft_32_restricted_shamt(
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 30
-; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 [[AND]])
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHAMT:%.*]], 30
+; CHECK-NEXT:    [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[AND]])
 ; CHECK-NEXT:    ret i32 [[OR]]
 ;
-  %and = and i32 %x, 30
+  %and = and i32 %shAmt, 30
   %shl = shl i32 %x, %and
   %sub = sub i32 0, %and
   %shr = lshr i32 %x, %sub