[llvm] [AArch64] Add ZIP and UZP shuffle costs. (PR #88150)

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 9 09:27:21 PDT 2024


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/88150

This adds some costs for the shuffle instructions that should be lowered to zip1/zip2/uzp1/uzp2 instructions.

>From e046146da26b04904b7858660e6eebefd6a73143 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 9 Apr 2024 17:24:55 +0100
Subject: [PATCH] [AArch64] Add ZIP and UZP shuffle costs.

This adds some costs for the shuffle instructions that should be lowered to
zip1/zip2/uzp1/uzp2 instructions.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 29 --------
 .../Target/AArch64/AArch64PerfectShuffle.h    | 33 ++++++++++
 .../AArch64/AArch64TargetTransformInfo.cpp    | 10 +++
 .../CostModel/AArch64/shuffle-other.ll        | 66 +++++++++----------
 .../CostModel/AArch64/shuffle-store.ll        | 14 ++--
 .../AArch64/vecreduce-shuffle.ll              |  2 +-
 6 files changed, 84 insertions(+), 70 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 819e8ccd5c33f0..43a0f960577bd2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11851,35 +11851,6 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
   return true;
 }
 
-static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned NumElts = VT.getVectorNumElements();
-  if (NumElts % 2 != 0)
-    return false;
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  unsigned Idx = WhichResult * NumElts / 2;
-  for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
-        (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
-      return false;
-    Idx += 1;
-  }
-
-  return true;
-}
-
-static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i != NumElts; ++i) {
-    if (M[i] < 0)
-      continue; // ignore UNDEF indices
-    if ((unsigned)M[i] != 2 * i + WhichResult)
-      return false;
-  }
-
-  return true;
-}
-
 static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned NumElts = VT.getVectorNumElements();
   if (NumElts % 2 != 0)
diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
index 5846fd454b6548..7abaead694d114 100644
--- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -16,6 +16,8 @@
 
 #include "llvm/ADT/ArrayRef.h"
 
+namespace llvm {
+
 // 31 entries have cost 0
 // 756 entries have cost 1
 // 3690 entries have cost 2
@@ -6618,4 +6620,35 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
   return (PFEntry >> 30) + 1;
 }
 
+inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+  unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts % 2 != 0)
+    return false;
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  unsigned Idx = WhichResult * NumElts / 2;
+  for (unsigned i = 0; i != NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
+        (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
+      return false;
+    Idx += 1;
+  }
+
+  return true;
+}
+
+inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (M[i] < 0)
+      continue; // ignore UNDEF indices
+    if ((unsigned)M[i] != 2 * i + WhichResult)
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace llvm
+
 #endif
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 20150d7386753f..bd943de06b4b2a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3932,6 +3932,16 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
       }))
     return 0;
 
+  // Check for other shuffles that are not SK_ kinds but we have native
+  // instructions for, for example ZIP and UZP.
+  unsigned Unused;
+  if (LT.second.isFixedLengthVector() &&
+      LT.second.getVectorNumElements() == Mask.size() &&
+      (Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) &&
+      (isZIPMask(Mask, LT.second, Unused) ||
+       isUZPMask(Mask, LT.second, Unused)))
+    return 1;
+
   if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
       Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc ||
       Kind == TTI::SK_Reverse || Kind == TTI::SK_Splice) {
diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
index d469ce63059322..6c45ebcb69f400 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll
@@ -171,28 +171,28 @@ define void @zip() {
 ; CHECK-LABEL: 'zip'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %zipv2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zip1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %zip2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %zipv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %zip1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %zip2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %zipv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %zip1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %zip2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %zipv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zipv2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zipv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zipv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zipv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zipv2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip1v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip2v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %zipv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %zip1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %zip2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %zipv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %zip1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %zip2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %zipv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zipv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zipv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zip1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zip2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %zipv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zipv2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -276,24 +276,24 @@ define void @zip() {
 
 define void @uzp() {
 ; CHECK-LABEL: 'uzp'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %uzp1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %uzp2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %uzpv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %uzp1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %uzp2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %uzpv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %uzp1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %uzp2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %uzpv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %uzpv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp1v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp2v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %uzpv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %uzp1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %uzp2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %uzpv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %uzp1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %uzp2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %uzpv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %uzpv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %uzp1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %uzp2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %uzpv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp1v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uzp2v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %uzpv4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
@@ -360,10 +360,10 @@ define void @uzp() {
 
 define void @multipart() {
 ; CHECK-LABEL: 'multipart'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v16a = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16a = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16b = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16c = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v16d = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16c = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v16d = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32a = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32a4 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32idrev = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 15, i32 14, i32 13, i32 12, i32 16, i32 17, i32 18, i32 19, i32 31, i32 30, i32 29, i32 28>
diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll
index 12de334574f5cf..cd434afddc9a99 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll
@@ -5,21 +5,21 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @vst2(ptr %p) {
 ; CHECK-LABEL: 'vst2'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4i8, ptr %p, align 4
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8i8, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16i8, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> %v32i8, ptr %p, align 32
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v4i16, ptr %p, align 8
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v8i16, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %v16i16, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> %v32i16, ptr %p, align 64
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v4i32, ptr %p, align 16
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
index c505cb7b181c20..d69cb75664a8ca 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
@@ -416,7 +416,7 @@ define i16 @reduceshuffle_twoin_notlowelt_v16i16(<16 x i16> %a, <16 x i16> %b) {
 
 define i16 @reduceshuffle_twoin_uneven_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_uneven_v16i16(
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 8>
 ; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
 ; CHECK-NEXT:    ret i16 [[R]]



More information about the llvm-commits mailing list