[llvm] [InstCombine] Fold shuffles through all trivially vectorizable intrinsics (PR #141979)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 09:49:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
This addresses a TODO in foldShuffledIntrinsicOperands to use isTriviallyVectorizable instead of a hardcoded list of intrinsics, which in turn allows more intriniscs to be scalarized by VectorCombine.
>From what I can tell every intrinsic here should be speculatable so an assertion was added.
Because this enables intrinsics like abs which have a scalar operand, we need to also check isVectorIntrinsicWithScalarOpAtArg.
---
Full diff: https://github.com/llvm/llvm-project/pull/141979.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+22-19)
- (modified) llvm/test/Transforms/InstCombine/abs-1.ll (+11)
- (modified) llvm/test/Transforms/InstCombine/fma.ll (+13)
- (modified) llvm/test/Transforms/InstCombine/sqrt.ll (+11)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e101edf4a6208..5eb466f6c6df1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1401,26 +1401,25 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
/// try to shuffle after the intrinsic.
Instruction *
InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
- // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
- // etc. Use llvm::isTriviallyVectorizable() and related to determine
- // which intrinsics are safe to shuffle?
- switch (II->getIntrinsicID()) {
- case Intrinsic::smax:
- case Intrinsic::smin:
- case Intrinsic::umax:
- case Intrinsic::umin:
- case Intrinsic::fma:
- case Intrinsic::fshl:
- case Intrinsic::fshr:
- break;
- default:
+ if (!isTriviallyVectorizable(II->getIntrinsicID()))
+ return nullptr;
+
+ assert(isSafeToSpeculativelyExecute(II) &&
+ "Trivially vectorizable but not safe to speculatively execute?");
+
+ // fabs is canonicalized to fabs (shuffle ...) in foldShuffleOfUnaryOps, so
+ // avoid undoing it.
+ if (match(II, m_FAbs(m_Value())))
return nullptr;
- }
Value *X;
Constant *C;
ArrayRef<int> Mask;
- auto *NonConstArg = find_if_not(II->args(), IsaPred<Constant>);
+ auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
+ return isa<Constant>(Arg.get()) ||
+ isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
+ Arg.getOperandNo(), nullptr);
+ });
if (!NonConstArg ||
!match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
return nullptr;
@@ -1432,11 +1431,15 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
// See if all arguments are shuffled with the same mask.
SmallVector<Value *, 4> NewArgs;
Type *SrcTy = X->getType();
- for (Value *Arg : II->args()) {
- if (match(Arg, m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
- X->getType() == SrcTy)
+ for (Use &Arg : II->args()) {
+ if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
+ Arg.getOperandNo(), nullptr))
+ NewArgs.push_back(Arg);
+ else if (match(&Arg,
+ m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
+ X->getType() == SrcTy)
NewArgs.push_back(X);
- else if (match(Arg, m_ImmConstant(C))) {
+ else if (match(&Arg, m_ImmConstant(C))) {
// If it's a constant, try find the constant that would be shuffled to C.
if (Constant *ShuffledC =
unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll
index 7037647d116ba..fd67fc3421498 100644
--- a/llvm/test/Transforms/InstCombine/abs-1.ll
+++ b/llvm/test/Transforms/InstCombine/abs-1.ll
@@ -978,3 +978,14 @@ define i32 @abs_diff_signed_slt_no_nsw_swap(i32 %a, i32 %b) {
%cond = select i1 %cmp, i32 %sub_ba, i32 %sub_ab
ret i32 %cond
}
+
+define <2 x i32> @abs_unary_shuffle_ops(<2 x i32> %x) {
+; CHECK-LABEL: @abs_unary_shuffle_ops(
+; CHECK-NEXT: [[R2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[R1:%.*]], i1 false)
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[R2]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+ %r = call <2 x i32> @llvm.abs(<2 x i32> %a, i1 false)
+ ret <2 x i32> %r
+}
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index f0d4f776a5d90..e3d3e722bcc23 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -972,6 +972,19 @@ define <2 x half> @fma_negone_vec_partial_undef(<2 x half> %x, <2 x half> %y) {
ret <2 x half> %sub
}
+define <2 x float> @fmuladd_unary_shuffle_ops(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
+; CHECK-LABEL: @fmuladd_unary_shuffle_ops(
+; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <2 x float> [[C:%.*]])
+; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x float> [[R1]]
+;
+ %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+ %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+ %c = shufflevector <2 x float> %z, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+ %r = call <2 x float> @llvm.fmuladd(<2 x float> %a, <2 x float> %b, <2 x float> %c)
+ ret <2 x float> %r
+}
+
; negative tests
define half @fma_non_negone(half %x, half %y) {
diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll
index 0f4db3b3a65ae..2fda5bc37d023 100644
--- a/llvm/test/Transforms/InstCombine/sqrt.ll
+++ b/llvm/test/Transforms/InstCombine/sqrt.ll
@@ -201,6 +201,17 @@ define <2 x float> @sqrt_exp_vec(<2 x float> %x) {
ret <2 x float> %res
}
+define <2 x float> @sqrt_unary_shuffle_ops(<2 x float> %x) {
+; CHECK-LABEL: @sqrt_unary_shuffle_ops(
+; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[A:%.*]])
+; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x float> [[R1]]
+;
+ %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+ %r = call <2 x float> @llvm.sqrt(<2 x float> %a)
+ ret <2 x float> %r
+}
+
declare i32 @foo(double)
declare double @sqrt(double) readnone
declare float @sqrtf(float)
``````````
</details>
https://github.com/llvm/llvm-project/pull/141979
More information about the llvm-commits
mailing list