[llvm] [VectorCombine] Add special handling for truncating shuffles (PR #70013)
Nabeel Omer via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 01:29:10 PDT 2023
https://github.com/omern1 updated https://github.com/llvm/llvm-project/pull/70013
>From ae8360674dcfba05a5391bb9cf7be8a408fcf200 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 23 Oct 2023 22:42:38 +0100
Subject: [PATCH 1/2] [VectorCombine] Add special handling for truncating
shuffles
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 12 ++++++++----
.../X86/reduction-truncating-vecs.ll | 17 +++++++++++++++++
2 files changed, 25 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/VectorCombine/X86/reduction-truncating-vecs.ll
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 16efc3b2336f2a5..943ff52bf3c1bd2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1472,21 +1472,25 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
dyn_cast<FixedVectorType>(Shuffle->getOperand(0)->getType());
if (!ShuffleInputType)
return false;
- int NumInputElts = ShuffleInputType->getNumElements();
+ unsigned int NumInputElts = ShuffleInputType->getNumElements();
// Find the mask from sorting the lanes into order. This is most likely to
// become a identity or concat mask. Undef elements are pushed to the end.
SmallVector<int> ConcatMask;
Shuffle->getShuffleMask(ConcatMask);
sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
+ // In the case of a truncating shuffle it's possible for the mask
+ // to have an index greater than the size of the resulting vector.
+ // This requires special handling.
+ bool IsTruncatingShuffle = VecType->getNumElements() < NumInputElts;
bool UsesSecondVec =
- any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
+ any_of(ConcatMask, [&](int M) { return M >= (int) NumInputElts; });
InstructionCost OldCost = TTI.getShuffleCost(
UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
- UsesSecondVec ? VecType : ShuffleInputType, Shuffle->getShuffleMask());
+ (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType, Shuffle->getShuffleMask());
InstructionCost NewCost = TTI.getShuffleCost(
UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
- UsesSecondVec ? VecType : ShuffleInputType, ConcatMask);
+ (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType, ConcatMask);
LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
<< "\n");
diff --git a/llvm/test/Transforms/VectorCombine/X86/reduction-truncating-vecs.ll b/llvm/test/Transforms/VectorCombine/X86/reduction-truncating-vecs.ll
new file mode 100644
index 000000000000000..4b429b30a7f5e59
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/reduction-truncating-vecs.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S --passes=vector-combine -mtriple=x86_64-sie-ps5 < %s | FileCheck %s
+
+define i16 @test_spill_mixed() {
+; CHECK-LABEL: define i16 @test_spill_mixed() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i32> zeroinitializer, <8 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 9>
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
+; CHECK-NEXT: ret i16 0
+;
+entry:
+ %0 = shufflevector <8 x i32> zeroinitializer, <8 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 9>
+ %1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %0)
+ ret i16 0
+}
+
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
>From 90fa05baee4f3a8ae470fd6aa1d55ba332b2a75c Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Tue, 24 Oct 2023 09:28:45 +0100
Subject: [PATCH 2/2] Fix formatting
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 943ff52bf3c1bd2..0cfbd0f060c966f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1484,13 +1484,15 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
// This requires special handling.
bool IsTruncatingShuffle = VecType->getNumElements() < NumInputElts;
bool UsesSecondVec =
- any_of(ConcatMask, [&](int M) { return M >= (int) NumInputElts; });
+ any_of(ConcatMask, [&](int M) { return M >= (int)NumInputElts; });
InstructionCost OldCost = TTI.getShuffleCost(
UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
- (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType, Shuffle->getShuffleMask());
+ (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType,
+ Shuffle->getShuffleMask());
InstructionCost NewCost = TTI.getShuffleCost(
UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
- (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType, ConcatMask);
+ (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType,
+ ConcatMask);
LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
<< "\n");
More information about the llvm-commits
mailing list