[llvm] [InstCombine] Fold rotate patterns with ZExt/Trunc at different Points (PR #142578)
Thomas Saller via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 06:15:10 PDT 2025
https://github.com/sallto updated https://github.com/llvm/llvm-project/pull/142578
>From d10b209daf7bed9e6832b6374459b05982850f57 Mon Sep 17 00:00:00 2001
From: sallto <thomas at saller.com.de>
Date: Mon, 19 May 2025 17:19:14 +0200
Subject: [PATCH 1/2] [InstCombine] Fold rotate patterns with ZExt/Trunc
Rotation pattern now fold with Trunc or if changes in the width happen at
different points for the left and right side.
https://alive2.llvm.org/ce/z/RkALLB fixes #138334
---
llvm/include/llvm/IR/PatternMatch.h | 7 +
.../InstCombine/InstCombineAndOrXor.cpp | 58 +++++---
llvm/test/Transforms/InstCombine/rotate.ll | 137 ++++++++++++++++++
llvm/unittests/IR/PatternMatch.cpp | 33 +++++
4 files changed, 217 insertions(+), 18 deletions(-)
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 6242a686e7bc0..b3061e6d4ccdc 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2160,6 +2160,13 @@ m_ZExtOrSelf(const OpTy &Op) {
return m_CombineOr(m_ZExt(Op), Op);
}
+template <typename OpTy>
+inline match_combine_or<CastInst_match<OpTy, TruncInst>,
+ match_combine_or<CastInst_match<OpTy, ZExtInst>, OpTy>>
+m_TruncOrZExtOrSelf(const OpTy &Op) {
+ return m_CombineOr(m_Trunc(Op), m_ZExtOrSelf(Op));
+}
+
template <typename OpTy>
inline match_combine_or<CastInst_match<OpTy, SExtInst>, OpTy>
m_SExtOrSelf(const OpTy &Op) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 59b46ebdb72e2..65614d0c23720 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2969,31 +2969,53 @@ InstCombinerImpl::convertOrOfShiftsToFunnelShift(Instruction &Or) {
if (!isPowerOf2_32(Width))
return nullptr;
- // The shift amount may be masked with negation:
- // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+ // Check that L and R operate on the same value X. Since the bitwidth of X
+ // can differ from L and R, there are multiple possible locations of ZExt
+ // or Trunc.
Value *X;
+ const APInt *LMask = nullptr;
+ const APInt *RMask = nullptr;
unsigned Mask = Width - 1;
- if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
- match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
- return X;
+ // L is essentially a no-op except for changing the type of X.
+ // There are multiple pattern like X & LMask or ZExt/Trunc
+ match(L, m_TruncOrZExtOrSelf(m_CombineOr(
+ m_And(m_TruncOrZExtOrSelf(m_Value(X)), m_APInt(LMask)),
+ m_Value(X))));
+
+ // R should be -X, sometimes (-X) & RMask is used, which is equivalent if
+ // RMask >= BitWidth - 1
+ const Value *ValueToNegate = nullptr;
+ if (!match(R, m_TruncOrZExtOrSelf(m_CombineOr(
+ m_And(m_Neg(m_Value(ValueToNegate)), m_APInt(RMask)),
+ m_Neg(m_Value(ValueToNegate))))) ||
+ (RMask && RMask->ult(Mask)))
+ return nullptr;
- // (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1)))
- if (match(R, m_And(m_Neg(m_Specific(L)), m_SpecificInt(Mask))))
- return L;
+ // ValueToNegate can be L if the rotate uses a bitwise-and on the shift
+ // amount before the rotate pattern.
+ if (!match(ValueToNegate, m_TruncOrZExtOrSelf(
+ m_CombineOr(m_Specific(X), m_Specific(L)))))
+ return nullptr;
- // Similar to above, but the shift amount may be extended after masking,
- // so return the extended value as the parameter for the intrinsic.
- if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
- match(R,
- m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
- m_SpecificInt(Mask))))
+ // L is a no-op, and L is guaranteed to be the same type as the rotate.
+ // We reuse the existing Zext/Trunc.
+ if (!LMask)
return L;
- if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
- match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
- return L;
+ // We can still fold with an LMask < Mask if R soley depends on L (not on
+ // X directly)
+ if (LMask->ult(Mask))
+ return (match(ValueToNegate, m_TruncOrZExtOrSelf(m_Specific(L))))
+ ? L
+ : nullptr;
- return nullptr;
+ // X has the same width as L and LMask >= BitWidth - 1, so L is a no-op.
+ Value *matchedX;
+ if (match(L, m_And(m_Value(matchedX), m_Value())))
+ return matchedX;
+
+ // L is Zext(And(...)), we can't reuse the Zext/Trunc.
+ return L;
};
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index a4d4ec375954f..3c416699f8dd7 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -698,6 +698,104 @@ define i64 @rotateleft_64_zext_neg_mask_amount(i64 %0, i32 %1) {
ret i64 %10
}
+define i64 @rotateright_64_zext_double_conversion(i64 %x, i32 %y) {
+; CHECK-LABEL: @rotateright_64_zext_double_conversion(
+; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[OR:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Z]])
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %z = zext i32 %y to i64
+ %neg = sub nsw i32 0, %y
+ %and2 = and i32 %neg, 63
+ %conv = zext i32 %and2 to i64
+ %shl = shl i64 %x, %conv
+ %shr = lshr i64 %x, %z
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+define i32 @rotateright_32_trunc_early(i32 %x, i64 %y) {
+; CHECK-LABEL: @rotateright_32_trunc_early(
+; CHECK-NEXT: [[Z:%.*]] = trunc i64 [[Y:%.*]] to i32
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Z]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %z = trunc i64 %y to i32
+ %neg = sub nsw i32 0, %z
+ %and2 = and i32 %neg, 31
+ %shl = shl i32 %x, %and2
+ %shr = lshr i32 %x, %z
+ %or = or i32 %shr, %shl
+ ret i32 %or
+}
+
+define i32 @rotateright_32_trunc_neg_mask_amount(i32 %x, i64 %y) {
+; CHECK-LABEL: @rotateright_32_trunc_neg_mask_amount(
+; CHECK-NEXT: [[Z:%.*]] = trunc i64 [[Y:%.*]] to i32
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Z]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %z = trunc i64 %y to i32
+ %neg = sub i64 0, %y
+ %and2 = and i64 %neg, 31
+ %conv = trunc i64 %and2 to i32
+ %shl = shl i32 %x, %conv
+ %shr = lshr i32 %x, %z
+ %or = or i32 %shr, %shl
+ ret i32 %or
+}
+
+; restrict the shift amount before rotating
+
+define i32 @rotateleft_32_restricted_shamt(i32 %x, i32 %shAmt) {
+; CHECK-LABEL: @rotateleft_32_restricted_shamt(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 30
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 [[AND]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %and = and i32 %x, 30
+ %shl = shl i32 %x, %and
+ %sub = sub i32 0, %and
+ %shr = lshr i32 %x, %sub
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
+; unncessarily large and masks
+
+define i32 @rotateleft_32_non_restricted_shamt(i32 %x, i32 %t) {
+; CHECK-LABEL: @rotateleft_32_non_restricted_shamt(
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[T:%.*]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %and = and i32 %t, 31
+ %shl = shl i32 %x, %and
+ %sub = sub nsw i32 0, %and
+ %and2 = and i32 %sub, 31
+ %shr = lshr i32 %x, %and2
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
+; negative test - right and mask is too small (should be >=31)
+
+define i32 @rotateleft_32_incorrect_right_mask(i32 %x, i32 %t) {
+; CHECK-LABEL: @rotateleft_32_incorrect_right_mask(
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[T:%.*]]
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[T]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], 30
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[AND]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %shl = shl i32 %x, %t
+ %sub = sub nsw i32 0, %t
+ %and = and i32 %sub, 30
+ %shr = lshr i32 %x, %and
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
@@ -1086,3 +1184,42 @@ define i32 @not_rotl_i32_add_less(i32 %x, i32 %y) {
%r = add i32 %shr, %shl
ret i32 %r
}
+
+; multi-use tests
+define i32 @rotateleft_32_use_zext(i32 %x, i16 %shAmt) {
+; CHECK-LABEL: @rotateleft_32_use_zext(
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[SHAMT:%.*]] to i32
+; CHECK-NEXT: call void @use(i32 [[CONV]])
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[CONV]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %conv = zext i16 %shAmt to i32
+ call void @use(i32 %conv)
+ %shl = shl i32 %x, %conv
+ %sub = sub i32 0, %conv
+ %shr = lshr i32 %x, %sub
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
+define i64 @rotateleft_64_use_and(i64 %x, i32 %y) {
+; CHECK-LABEL: @rotateleft_64_use_and(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], 63
+; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[AND]] to i64
+; CHECK-NEXT: call void @use(i64 [[Z]])
+; CHECK-NEXT: [[OR:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Z]])
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %and = and i32 %y, 63
+ %z = zext i32 %and to i64
+ call void @use(i64 %z)
+ %neg = sub nsw i32 0, %y
+ %and2 = and i32 %neg, 63
+ %conv = zext i32 %and2 to i64
+ %shl = shl i64 %x, %conv
+ %shr = lshr i64 %x, %z
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+declare void @use(i32)
diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index 7b3a4ce365453..a5687b92e8310 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -635,6 +635,39 @@ TEST_F(PatternMatchTest, ZExtSExtSelf) {
EXPECT_TRUE(m_ZExtOrSExtOrSelf(m_One()).match(One64S));
}
+TEST_F(PatternMatchTest, TruncZextSelf) {
+ LLVMContext &Ctx = IRB.getContext();
+
+ Value *One32 = IRB.getInt32(1);
+ Value *One64 = IRB.getInt64(1);
+ Value *One32T = IRB.CreateTrunc(One64, IRB.getInt32Ty());
+ Value *One64Z = IRB.CreateZExt(One32, IntegerType::getInt64Ty(Ctx));
+ Value *One64S = IRB.CreateSExt(One32, IntegerType::getInt64Ty(Ctx));
+
+ EXPECT_TRUE(m_One().match(One32));
+ EXPECT_TRUE(m_One().match(One64));
+ EXPECT_FALSE(m_One().match(One32T));
+ EXPECT_FALSE(m_One().match(One64Z));
+ EXPECT_FALSE(m_One().match(One64S));
+
+ EXPECT_FALSE(m_Trunc(m_One()).match(One32));
+ EXPECT_TRUE(m_Trunc(m_One()).match(One32T));
+ EXPECT_FALSE(m_Trunc(m_One()).match(One64Z));
+ EXPECT_FALSE(m_Trunc(m_One()).match(One64S));
+
+ EXPECT_FALSE(m_ZExt(m_One()).match(One32));
+ EXPECT_FALSE(m_ZExt(m_One()).match(One64));
+ EXPECT_FALSE(m_ZExt(m_One()).match(One32T));
+ EXPECT_TRUE(m_ZExt(m_One()).match(One64Z));
+ EXPECT_FALSE(m_ZExt(m_One()).match(One64S));
+
+ EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One32));
+ EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One64));
+ EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One32T));
+ EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One64Z));
+ EXPECT_FALSE(m_TruncOrZExtOrSelf(m_One()).match(One64S));
+}
+
TEST_F(PatternMatchTest, BitCast) {
Value *OneDouble = ConstantFP::get(IRB.getDoubleTy(), APFloat(1.0));
Value *ScalableDouble = ConstantFP::get(
>From 1cab9ecd75aa121f1b21ac69249fc45b49be159b Mon Sep 17 00:00:00 2001
From: sallto <thomas at saller.com.de>
Date: Tue, 3 Jun 2025 15:12:41 +0200
Subject: [PATCH 2/2] fix typo in test case
---
llvm/test/Transforms/InstCombine/rotate.ll | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index 3c416699f8dd7..0c9aac8f55170 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -749,11 +749,11 @@ define i32 @rotateright_32_trunc_neg_mask_amount(i32 %x, i64 %y) {
define i32 @rotateleft_32_restricted_shamt(i32 %x, i32 %shAmt) {
; CHECK-LABEL: @rotateleft_32_restricted_shamt(
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 30
-; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 [[AND]])
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHAMT:%.*]], 30
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[AND]])
; CHECK-NEXT: ret i32 [[OR]]
;
- %and = and i32 %x, 30
+ %and = and i32 %shAmt, 30
%shl = shl i32 %x, %and
%sub = sub i32 0, %and
%shr = lshr i32 %x, %sub
More information about the llvm-commits
mailing list