[llvm] [VectorCombine] New folding pattern for extract/binop/shuffle chains (PR #145232)

Rajveer Singh Bharadwaj via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 25 05:57:45 PDT 2025


https://github.com/Rajveer100 updated https://github.com/llvm/llvm-project/pull/145232

>From d130a707aa74741a6a101508212ab2afc2942f5c Mon Sep 17 00:00:00 2001
From: Rajveer <rajveer.developer at icloud.com>
Date: Sun, 22 Jun 2025 17:39:34 +0530
Subject: [PATCH] [VectorCombine] New folding pattern for extract/binop/shuffle
 chains

Resolves #144654
Part of #143088

This adds a new `foldShuffleChainsToReduce` for horizontal reduction of
patterns like:

```llvm
define i16 @test_reduce_v8i16(<8 x i16> %a0) local_unnamed_addr #0 {
  %1 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
  %2 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a0, <8 x i16> %1)
  %3 = shufflevector <8 x i16> %2, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %4 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %2, <8 x i16> %3)
  %5 = shufflevector <8 x i16> %4, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %6 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %4, <8 x i16> %5)
  %7 = extractelement <8 x i16> %6, i64 0
  ret i16 %7
}
```
...which can be reduced to a llvm.vector.reduce.umin.v8i16(%a0) intrinsic call.

Similar transformation for other ops when costs permit to do so.
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 169 +++++++++++++++
 .../X86/shuffle-chain-reduction-umin.ll       | 203 ++++++++++++++++++
 .../fold-shuffle-chains-to-reduce.ll          |  18 ++
 3 files changed, 390 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/X86/shuffle-chain-reduction-umin.ll
 create mode 100644 llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 52cb1dbb33b86..127090c1d4d13 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -129,6 +129,7 @@ class VectorCombine {
   bool foldShuffleOfIntrinsics(Instruction &I);
   bool foldShuffleToIdentity(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
+  bool foldShuffleChainsToReduce(Instruction &I);
   bool foldCastFromReductions(Instruction &I);
   bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
   bool foldInterleaveIntrinsics(Instruction &I);
@@ -2910,6 +2911,171 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
   return foldSelectShuffle(*Shuffle, true);
 }
 
+bool VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
+  auto *EEI = dyn_cast<ExtractElementInst>(&I);
+  if (!EEI)
+    return false;
+
+  std::queue<Value *> InstWorklist;
+  Value *InitEEV = nullptr;
+  Intrinsic::ID CommonOp = 0;
+
+  bool IsFirstEEInst = true, IsFirstCallInst = true;
+  bool ShouldBeCallInst = true;
+
+  SmallVector<Value *, 3> PrevVecV(3, nullptr);
+  int64_t ShuffleMaskHalf = -1, ExpectedShuffleMaskHalf = 1;
+  int64_t VecSize = -1;
+
+  InstWorklist.push(EEI);
+
+  while (!InstWorklist.empty()) {
+    Value *V = InstWorklist.front();
+    InstWorklist.pop();
+
+    auto *CI = dyn_cast<Instruction>(V);
+    if (!CI)
+      return false;
+
+    if (auto *EEInst = dyn_cast<ExtractElementInst>(CI)) {
+      if (!IsFirstEEInst)
+        return false;
+      IsFirstEEInst = false;
+
+      auto *VecOp = EEInst->getVectorOperand();
+      if (!VecOp)
+        return false;
+
+      auto *FVT = dyn_cast<FixedVectorType>(VecOp->getType());
+      if (!FVT)
+        return false;
+
+      VecSize = FVT->getNumElements();
+      if (VecSize < 2 || VecSize % 2 != 0)
+        return false;
+
+      auto *IndexOp = EEInst->getIndexOperand();
+      if (!IndexOp)
+        return false;
+
+      auto *ConstIndex = dyn_cast<ConstantInt>(IndexOp);
+      if (ConstIndex->getValue() != 0)
+        return false;
+
+      ShuffleMaskHalf = 1;
+      PrevVecV[2] = VecOp;
+      InitEEV = EEInst;
+      InstWorklist.push(PrevVecV[2]);
+    } else if (auto *CallI = dyn_cast<CallInst>(CI)) {
+      if (IsFirstEEInst || !ShouldBeCallInst || !PrevVecV[2])
+        return false;
+
+      if (!IsFirstCallInst &&
+          any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))
+        return false;
+
+      if (CallI != (IsFirstCallInst ? PrevVecV[2] : PrevVecV[0]))
+        return false;
+      IsFirstCallInst = false;
+
+      auto *II = dyn_cast<IntrinsicInst>(CallI);
+      if (!II)
+        return false;
+
+      if (!CommonOp)
+        CommonOp = II->getIntrinsicID();
+      if (II->getIntrinsicID() != CommonOp)
+        return false;
+
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::umin:
+      case Intrinsic::umax:
+      case Intrinsic::smin:
+      case Intrinsic::smax: {
+        auto *Op0 = CallI->getOperand(0);
+        auto *Op1 = CallI->getOperand(1);
+        PrevVecV[0] = Op0;
+        PrevVecV[1] = Op1;
+        break;
+      }
+      default:
+        return false;
+      }
+      ShouldBeCallInst ^= 1;
+
+      if (!isa<ShuffleVectorInst>(PrevVecV[1]))
+        std::swap(PrevVecV[0], PrevVecV[1]);
+      InstWorklist.push(PrevVecV[1]);
+      InstWorklist.push(PrevVecV[0]);
+    } else if (auto *SVInst = dyn_cast<ShuffleVectorInst>(CI)) {
+      if (IsFirstEEInst || ShouldBeCallInst ||
+          any_of(PrevVecV, [](Value *VecV) { return VecV == nullptr; }))
+        return false;
+
+      if (SVInst != PrevVecV[1])
+        return false;
+
+      auto *ShuffleVec = SVInst->getOperand(0);
+      if (!ShuffleVec || ShuffleVec != PrevVecV[0])
+        return false;
+
+      SmallVector<int> CurMask;
+      SVInst->getShuffleMask(CurMask);
+
+      if (ShuffleMaskHalf != ExpectedShuffleMaskHalf)
+        return false;
+      ExpectedShuffleMaskHalf *= 2;
+
+      for (int Mask = 0, MaskSize = CurMask.size(); Mask != MaskSize; ++Mask) {
+        if (Mask < ShuffleMaskHalf && CurMask[Mask] != ShuffleMaskHalf + Mask)
+          return false;
+        if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
+          return false;
+      }
+      ShuffleMaskHalf *= 2;
+      if (ExpectedShuffleMaskHalf == VecSize)
+        break;
+      ShouldBeCallInst ^= 1;
+    } else {
+      return false;
+    }
+  }
+
+  if (IsFirstEEInst || ShouldBeCallInst)
+    return false;
+
+  assert(VecSize != -1 && ExpectedShuffleMaskHalf == VecSize &&
+         "Expected Match for Vector Size and Mask Half");
+
+  Value *FinalVecV = PrevVecV[0];
+  if (!InitEEV || !FinalVecV)
+    return false;
+
+  Intrinsic::ID ReducedOp = 0;
+  switch (CommonOp) {
+  case Intrinsic::umin:
+    ReducedOp = Intrinsic::vector_reduce_umin;
+    break;
+  case Intrinsic::umax:
+    ReducedOp = Intrinsic::vector_reduce_umax;
+    break;
+  case Intrinsic::smin:
+    ReducedOp = Intrinsic::vector_reduce_smin;
+    break;
+  case Intrinsic::smax:
+    ReducedOp = Intrinsic::vector_reduce_smax;
+    break;
+  default:
+    return false;
+  }
+
+  auto *ReducedResult =
+      Builder.CreateIntrinsic(ReducedOp, {FinalVecV->getType()}, {FinalVecV});
+  replaceValue(*InitEEV, *ReducedResult);
+
+  return true;
+}
+
 /// Determine if its more efficient to fold:
 ///   reduce(trunc(x)) -> trunc(reduce(x)).
 ///   reduce(sext(x))  -> sext(reduce(x)).
@@ -3621,6 +3787,9 @@ bool VectorCombine::run() {
         MadeChange |= foldShuffleFromReductions(I);
         MadeChange |= foldCastFromReductions(I);
         break;
+      case Instruction::ExtractElement:
+        MadeChange |= foldShuffleChainsToReduce(I);
+        break;
       case Instruction::ICmp:
       case Instruction::FCmp:
         MadeChange |= foldExtractExtract(I);
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-chain-reduction-umin.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-chain-reduction-umin.ll
new file mode 100644
index 0000000000000..d1fd11fb0e6d3
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-chain-reduction-umin.ll
@@ -0,0 +1,203 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes=vector-combine -S %s | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes=vector-combine -S %s | FileCheck %s --check-prefixes=CHECK,SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes=vector-combine -S %s | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v4 -passes=vector-combine -S %s | FileCheck %s --check-prefixes=CHECK,AVX
+
+define i16 @test_reduce_v8i16(<8 x i16> %a0) local_unnamed_addr #0 {
+; CHECK-LABEL: define i16 @test_reduce_v8i16(
+; CHECK-SAME: <8 x i16> [[A0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A0]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+  %2 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a0, <8 x i16> %1)
+  %3 = shufflevector <8 x i16> %2, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %4 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %2, <8 x i16> %3)
+  %5 = shufflevector <8 x i16> %4, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %6 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %4, <8 x i16> %5)
+  %7 = extractelement <8 x i16> %6, i64 0
+  ret i16 %7
+}
+
+define i8 @test_reduce_v16i8(<16 x i8> %a0) local_unnamed_addr #0 {
+;
+; CHECK-LABEL: define i8 @test_reduce_v16i8(
+; CHECK-SAME: <16 x i8> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP8:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[A0]])
+; CHECK-NEXT:    ret i8 [[TMP8]]
+;
+  %1 = shufflevector <16 x i8> %a0, <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %2 = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a0, <16 x i8> %1)
+  %3 = shufflevector <16 x i8> %2, <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %4 = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %2, <16 x i8> %3)
+  %5 = shufflevector <16 x i8> %4, <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %6 = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %4, <16 x i8> %5)
+  %7 = shufflevector <16 x i8> %6, <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %8 = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %6, <16 x i8> %7)
+  %9 = extractelement <16 x i8> %8, i64 0
+  ret i8 %9
+}
+
+define i8 @test_reduce_v32i8(<32 x i8> %a0) local_unnamed_addr #0 {
+; CHECK-LABEL: define i8 @test_reduce_v32i8(
+; CHECK-SAME: <32 x i8> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> [[A0]])
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = shufflevector <32 x i8> %a0, <32 x i8> poison, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+  i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %2 = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> %a0, <32 x i8> %1)
+  %3 = shufflevector <32 x i8> %2, <32 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %4 = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> %2, <32 x i8> %3)
+  %5 = shufflevector <32 x i8> %4, <32 x i8> poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %6 = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> %4, <32 x i8> %5)
+  %7 = shufflevector <32 x i8> %6, <32 x i8> poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %8 = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> %6, <32 x i8> %7)
+  %9 = shufflevector <32 x i8> %8, <32 x i8> poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %10 = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> %8, <32 x i8> %9)
+  %11 = extractelement <32 x i8> %10, i64 0
+  ret i8 %11
+}
+
+define i16 @test_reduce_v16i16(<16 x i16> %a0) local_unnamed_addr #0 {
+; CHECK-LABEL: define i16 @test_reduce_v16i16(
+; CHECK-SAME: <16 x i16> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> [[A0]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %1 = shufflevector <16 x i16> %a0, <16 x i16> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %2 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %a0, <16 x i16> %1)
+  %3 = shufflevector <16 x i16> %2, <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %4 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %2, <16 x i16> %3)
+  %5 = shufflevector <16 x i16> %4, <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %6 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %4, <16 x i16> %5)
+  %7 = shufflevector <16 x i16> %6, <16 x i16> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %8 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %6, <16 x i16> %7)
+  %9 = extractelement <16 x i16> %8, i64 0
+  ret i16 %9
+}
+
+define i8 @test_reduce_v64i8(<64 x i8> %a0) local_unnamed_addr #0 {
+; CHECK-LABEL: define i8 @test_reduce_v64i8(
+; CHECK-SAME: <64 x i8> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> [[A0]])
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = shufflevector <64 x i8> %a0, <64 x i8> poison, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39,
+  i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47,
+  i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55,
+  i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %2 = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> %a0, <64 x i8> %1)
+  %3 = shufflevector <64 x i8> %2, <64 x i8> poison, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+  i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %4 = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> %2, <64 x i8> %3)
+  %5 = shufflevector <64 x i8> %4, <64 x i8> poison, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %6 = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> %4, <64 x i8> %5)
+  %7 = shufflevector <64 x i8> %6, <64 x i8> poison, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %8 = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> %6, <64 x i8> %7)
+  %9 = shufflevector <64 x i8> %8, <64 x i8> poison, <64 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %10 = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> %8, <64 x i8> %9)
+  %11 = shufflevector <64 x i8> %10, <64 x i8> poison, <64 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %12 = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> %10, <64 x i8> %11)
+  %13 = extractelement <64 x i8> %12, i64 0
+  ret i8 %13
+}
+
+define i16 @test_reduce_v32i16(<32 x i16> %a0) local_unnamed_addr #0 {
+; CHECK-LABEL: define i16 @test_reduce_v32i16(
+; CHECK-SAME: <32 x i16> [[A0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> [[A0]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %1 = shufflevector <32 x i16> %a0, <32 x i16> poison, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+  i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %2 = tail call <32 x i16> @llvm.umin.v32i16(<32 x i16> %a0, <32 x i16> %1)
+  %3 = shufflevector <32 x i16> %2, <32 x i16> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %4 = tail call <32 x i16> @llvm.umin.v32i16(<32 x i16> %2, <32 x i16> %3)
+  %5 = shufflevector <32 x i16> %4, <32 x i16> poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %6 = tail call <32 x i16> @llvm.umin.v32i16(<32 x i16> %4, <32 x i16> %5)
+  %7 = shufflevector <32 x i16> %6, <32 x i16> poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %8 = tail call <32 x i16> @llvm.umin.v32i16(<32 x i16> %6, <32 x i16> %7)
+  %9 = shufflevector <32 x i16> %8, <32 x i16> poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
+  i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %10 = tail call <32 x i16> @llvm.umin.v32i16(<32 x i16> %8, <32 x i16> %9)
+  %11 = extractelement <32 x i16> %10, i64 0
+  ret i16 %11
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE: {{.*}}
diff --git a/llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll b/llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll
new file mode 100644
index 0000000000000..6f21eb5097fde
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=vector-combine -S | FileCheck %s
+
+define i16 @test_reduce_v8i16(<8 x i16> %a0) local_unnamed_addr #0 {
+; CHECK-LABEL: define i16 @test_reduce_v8i16(
+; CHECK-SAME: <8 x i16> [[A0:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A0]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+  %2 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a0, <8 x i16> %1)
+  %3 = shufflevector <8 x i16> %2, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %4 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %2, <8 x i16> %3)
+  %5 = shufflevector <8 x i16> %4, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %6 = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %4, <8 x i16> %5)
+  %7 = extractelement <8 x i16> %6, i64 0
+  ret i16 %7
+}



More information about the llvm-commits mailing list