[llvm] [VectorCombine] Add foldShuffleOfIntrinsics. (PR #106502)
Han-Kuan Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 00:34:25 PDT 2024
https://github.com/HanKuanChen updated https://github.com/llvm/llvm-project/pull/106502
>From d8f1ab5bb1382d186a1778fe15bd141f970ebac3 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 28 Aug 2024 23:53:01 -0700
Subject: [PATCH 1/2] [VectorCombine] Pre-commit test.
---
.../VectorCombine/foldShuffleOfIntrinsics.ll | 34 +++++++++++++++++++
1 file changed, 34 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/foldShuffleOfIntrinsics.ll
diff --git a/llvm/test/Transforms/VectorCombine/foldShuffleOfIntrinsics.ll b/llvm/test/Transforms/VectorCombine/foldShuffleOfIntrinsics.ll
new file mode 100644
index 00000000000000..ba45f24929c71e
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/foldShuffleOfIntrinsics.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=vector-combine -S %s | FileCheck %s
+
+define <8 x i32> @test1(<4 x i32> %0, <4 x i32> %1) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP0:%.*]], i1 false)
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP1:%.*]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i32> [[TMP4]]
+;
+entry:
+ %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %0, i1 false)
+ %3 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1, i1 false)
+ %4 = shufflevector <4 x i32> %2, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %4
+}
+
+define <8 x i32> @test2(<4 x i32> %0, <4 x i32> %1) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP0:%.*]], i1 true)
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP1:%.*]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x i32> [[TMP4]]
+;
+entry:
+ %2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %0, i1 true)
+ %3 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %1, i1 false)
+ %4 = shufflevector <4 x i32> %2, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %4
+}
+
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
>From 874963226704580f69773745b4356c6227f652d9 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen at sifive.com>
Date: Wed, 28 Aug 2024 23:53:32 -0700
Subject: [PATCH 2/2] [VectorCombine] Add foldShuffleOfIntrinsics.
---
.../Transforms/Vectorize/VectorCombine.cpp | 94 +++++++++++++++++++
.../VectorCombine/foldShuffleOfIntrinsics.ll | 7 +-
2 files changed, 97 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 99bd383ab0dead..9ad7276ffb9707 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -115,6 +115,7 @@ class VectorCombine {
bool foldShuffleOfBinops(Instruction &I);
bool foldShuffleOfCastops(Instruction &I);
bool foldShuffleOfShuffles(Instruction &I);
+ bool foldShuffleOfIntrinsics(Instruction &I);
bool foldShuffleToIdentity(Instruction &I);
bool foldShuffleFromReductions(Instruction &I);
bool foldCastFromReductions(Instruction &I);
@@ -1673,6 +1674,98 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
return true;
}
+/// Try to convert
+/// "shuffle (intrinsic), (intrinsic)" into "intrinsic (shuffle), (shuffle)".
+bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
+ Value *V0, *V1;
+ ArrayRef<int> OldMask;
+ if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)),
+ m_Mask(OldMask))))
+ return false;
+
+ auto *II0 = dyn_cast<IntrinsicInst>(V0);
+ auto *II1 = dyn_cast<IntrinsicInst>(V1);
+ if (!II0 || !II1)
+ return false;
+
+ Intrinsic::ID IID = II0->getIntrinsicID();
+ if (IID != II1->getIntrinsicID())
+ return false;
+
+ auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
+ auto *II0Ty = dyn_cast<FixedVectorType>(II0->getType());
+ if (!ShuffleDstTy || !II0Ty)
+ return false;
+
+ switch (IID) {
+ case Intrinsic::abs: {
+ if (cast<Constant>(II0->getArgOperand(1))->isOneValue() !=
+ cast<Constant>(II1->getArgOperand(1))->isOneValue())
+ return false;
+ break;
+ }
+ default:
+ return false;
+ }
+
+ SmallVector<Value *> Args0;
+ SmallVector<Value *> Args1;
+ for (unsigned I = 0; I != II0->arg_size(); ++I) {
+ Args0.push_back(II0->getArgOperand(I));
+ Args1.push_back(II1->getArgOperand(I));
+ }
+ IntrinsicCostAttributes Attr0(IID, II0Ty, Args0);
+ IntrinsicCostAttributes Attr1(IID, II1->getType(), Args1);
+ InstructionCost OldCost =
+ TTI.getIntrinsicInstrCost(Attr0, TTI::TCK_RecipThroughput) +
+ TTI.getIntrinsicInstrCost(Attr1, TTI::TCK_RecipThroughput) +
+ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, II0Ty, OldMask,
+ TTI::TCK_RecipThroughput, 0, nullptr, {II0, II1}, &I);
+
+ InstructionCost NewCost;
+ switch (IID) {
+ case Intrinsic::abs: {
+ IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy,
+ {ShuffleDstTy, Builder.getInt1Ty()});
+ NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, II0Ty,
+ OldMask, TTI::TCK_RecipThroughput) +
+ TTI.getIntrinsicInstrCost(NewAttr, TTI::TCK_RecipThroughput);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ }
+
+ LLVM_DEBUG(dbgs() << "Found a shuffle feeding two intrinsics: " << I
+ << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
+ << "\n");
+
+ if (NewCost > OldCost)
+ return false;
+
+ Value *NewIntrinsic;
+ switch (IID) {
+ case Intrinsic::abs: {
+ Value *Shuf = Builder.CreateShuffleVector(Args0[0], Args1[0], OldMask);
+ NewIntrinsic = Builder.CreateIntrinsic(
+ ShuffleDstTy, IID, {Shuf, cast<Constant>(II0->getArgOperand(1))});
+ Worklist.pushValue(Shuf);
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ }
+
+ // Intersect flags from the old intrinsics.
+ if (auto *NewInst = dyn_cast<Instruction>(NewIntrinsic)) {
+ NewInst->copyIRFlags(II0);
+ NewInst->andIRFlags(II1);
+ }
+
+ replaceValue(I, *NewIntrinsic);
+ return true;
+}
+
using InstLane = std::pair<Use *, int>;
static InstLane lookThroughShuffles(Use *U, int Lane) {
@@ -2554,6 +2647,7 @@ bool VectorCombine::run() {
MadeChange |= foldShuffleOfBinops(I);
MadeChange |= foldShuffleOfCastops(I);
MadeChange |= foldShuffleOfShuffles(I);
+ MadeChange |= foldShuffleOfIntrinsics(I);
MadeChange |= foldSelectShuffle(I);
MadeChange |= foldShuffleToIdentity(I);
break;
diff --git a/llvm/test/Transforms/VectorCombine/foldShuffleOfIntrinsics.ll b/llvm/test/Transforms/VectorCombine/foldShuffleOfIntrinsics.ll
index ba45f24929c71e..32a11a7d813b28 100644
--- a/llvm/test/Transforms/VectorCombine/foldShuffleOfIntrinsics.ll
+++ b/llvm/test/Transforms/VectorCombine/foldShuffleOfIntrinsics.ll
@@ -4,10 +4,9 @@
define <8 x i32> @test1(<4 x i32> %0, <4 x i32> %1) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP0:%.*]], i1 false)
-; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP1:%.*]], i1 false)
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: ret <8 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.abs.v8i32(<8 x i32> [[TMP2]], i1 false)
+; CHECK-NEXT: ret <8 x i32> [[TMP3]]
;
entry:
%2 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %0, i1 false)
More information about the llvm-commits
mailing list