[llvm] [VectorCombine] Add special handling for truncating shuffles (PR #70013)

Tue Oct 24 01:29:10 PDT 2023

https://github.com/omern1 updated https://github.com/llvm/llvm-project/pull/70013

>From ae8360674dcfba05a5391bb9cf7be8a408fcf200 Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Mon, 23 Oct 2023 22:42:38 +0100
Subject: [PATCH 1/2] [VectorCombine] Add special handling for truncating
 shuffles

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 12 ++++++++----
 .../X86/reduction-truncating-vecs.ll            | 17 +++++++++++++++++
 2 files changed, 25 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/VectorCombine/X86/reduction-truncating-vecs.ll

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 16efc3b2336f2a5..943ff52bf3c1bd2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1472,21 +1472,25 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
       dyn_cast<FixedVectorType>(Shuffle->getOperand(0)->getType());
   if (!ShuffleInputType)
     return false;
-  int NumInputElts = ShuffleInputType->getNumElements();
+  unsigned int NumInputElts = ShuffleInputType->getNumElements();
 
   // Find the mask from sorting the lanes into order. This is most likely to
   // become a identity or concat mask. Undef elements are pushed to the end.
   SmallVector<int> ConcatMask;
   Shuffle->getShuffleMask(ConcatMask);
   sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
+  // In the case of a truncating shuffle it's possible for the mask
+  // to have an index greater than the size of the resulting vector.
+  // This requires special handling.
+  bool IsTruncatingShuffle = VecType->getNumElements() < NumInputElts;
   bool UsesSecondVec =
-      any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
+      any_of(ConcatMask, [&](int M) { return M >= (int) NumInputElts; });
   InstructionCost OldCost = TTI.getShuffleCost(
       UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
-      UsesSecondVec ? VecType : ShuffleInputType, Shuffle->getShuffleMask());
+      (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType, Shuffle->getShuffleMask());
   InstructionCost NewCost = TTI.getShuffleCost(
       UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
-      UsesSecondVec ? VecType : ShuffleInputType, ConcatMask);
+      (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType, ConcatMask);
 
   LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
                     << "\n");
diff --git a/llvm/test/Transforms/VectorCombine/X86/reduction-truncating-vecs.ll b/llvm/test/Transforms/VectorCombine/X86/reduction-truncating-vecs.ll
new file mode 100644
index 000000000000000..4b429b30a7f5e59
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/reduction-truncating-vecs.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -S --passes=vector-combine -mtriple=x86_64-sie-ps5 < %s | FileCheck %s
+
+define i16 @test_spill_mixed() {
+; CHECK-LABEL: define i16 @test_spill_mixed() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i32> zeroinitializer, <8 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 9>
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
+; CHECK-NEXT:    ret i16 0
+;
+entry:
+  %0 = shufflevector <8 x i32> zeroinitializer, <8 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 9>
+  %1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %0)
+  ret i16 0
+}
+
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)

>From 90fa05baee4f3a8ae470fd6aa1d55ba332b2a75c Mon Sep 17 00:00:00 2001
From: Nabeel Omer <Nabeel.Omer at sony.com>
Date: Tue, 24 Oct 2023 09:28:45 +0100
Subject: [PATCH 2/2] Fix formatting

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 943ff52bf3c1bd2..0cfbd0f060c966f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1484,13 +1484,15 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
   // This requires special handling.
   bool IsTruncatingShuffle = VecType->getNumElements() < NumInputElts;
   bool UsesSecondVec =
-      any_of(ConcatMask, [&](int M) { return M >= (int) NumInputElts; });
+      any_of(ConcatMask, [&](int M) { return M >= (int)NumInputElts; });
   InstructionCost OldCost = TTI.getShuffleCost(
       UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
-      (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType, Shuffle->getShuffleMask());
+      (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType,
+      Shuffle->getShuffleMask());
   InstructionCost NewCost = TTI.getShuffleCost(
       UsesSecondVec ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc,
-      (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType, ConcatMask);
+      (UsesSecondVec && !IsTruncatingShuffle) ? VecType : ShuffleInputType,
+      ConcatMask);
 
   LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
                     << "\n");