[llvm] 85bc868 - [AArch64][TTI] Reduce cost for splatting whole first vector segment (SVE) (#145701)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 2 01:51:59 PDT 2025


Author: Graham Hunter
Date: 2025-07-02T09:51:56+01:00
New Revision: 85bc8684173bb175fd4372a11b3fa3ef5455612f

URL: https://github.com/llvm/llvm-project/commit/85bc8684173bb175fd4372a11b3fa3ef5455612f
DIFF: https://github.com/llvm/llvm-project/commit/85bc8684173bb175fd4372a11b3fa3ef5455612f.diff

LOG: [AArch64][TTI] Reduce cost for splatting whole first vector segment (SVE) (#145701)

Improve cost modeling for splatting the first 128b segment.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
index e9bc6d947b0d9..f7beca1b8b77e 100644
--- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -6750,6 +6750,21 @@ inline std::optional<unsigned> isDUPQMask(ArrayRef<int> Mask, unsigned Segments,
   return std::nullopt;
 }
 
+/// isDUPFirstSegmentMask - matches a splat of the first 128b segment.
+inline bool isDUPFirstSegmentMask(ArrayRef<int> Mask, unsigned Segments,
+                                  unsigned SegmentSize) {
+  // Make sure there's no size changes.
+  if (SegmentSize * Segments != Mask.size())
+    return false;
+
+  // Check that all lanes refer to the equivalent lane in the first segment.
+  // Undef/poison lanes (<0) are also accepted.
+  return all_of(enumerate(Mask), [&](auto P) {
+    const unsigned IndexWithinSegment = P.index() % SegmentSize;
+    return P.value() < 0 || unsigned(P.value()) == IndexWithinSegment;
+  });
+}
+
 } // namespace llvm
 
 #endif

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 79b2dc2b3845e..66252f737ddb9 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5600,9 +5600,8 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
   }
 
   // Segmented shuffle matching.
-  if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
-      ST->isSVEorStreamingSVEAvailable() && Kind == TTI::SK_PermuteSingleSrc &&
-      isa<FixedVectorType>(SrcTy) && !Mask.empty() &&
+  if (Kind == TTI::SK_PermuteSingleSrc && isa<FixedVectorType>(SrcTy) &&
+      !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
       SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
           AArch64::SVEBitsPerBlock)) {
 
@@ -5612,7 +5611,14 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
     unsigned SegmentElts = VTy->getNumElements() / Segments;
 
     // dupq zd.t, zn.t[idx]
-    if (isDUPQMask(Mask, Segments, SegmentElts))
+    if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
+        ST->isSVEorStreamingSVEAvailable() &&
+        isDUPQMask(Mask, Segments, SegmentElts))
+      return LT.first;
+
+    // mov zd.q, vn
+    if (ST->isSVEorStreamingSVEAvailable() &&
+        isDUPFirstSegmentMask(Mask, Segments, SegmentElts))
       return LT.first;
   }
 

diff  --git a/llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll b/llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll
index 790f49f1d3b82..8b94cefbad63b 100644
--- a/llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/segmented-shufflevector-patterns.ll
@@ -49,5 +49,53 @@ define void @dup_within_each_segment_512b() #1 {
   ret void
 }
 
+define void @dup_whole_segment_256b() #0 {
+; CHECK-LABEL: 'dup_whole_segment_256b'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+                                                                             i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                                                                               i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
+                                                                            i32 0, i32 1, i32 2, i32 3>
+  %dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1,
+                                                                                       i32 0, i32 1, i32 0, i32 1>
+  %dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3,
+                                                                                            i32 poison, i32 1, i32 2, i32 3>
+  ret void
+}
+
+define void @dup_whole_segment_512b() #1 {
+; CHECK-LABEL: 'dup_whole_segment_512b'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %dup_seg_b = shufflevector <32 x i8> poison, <32 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+                                                                             i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %dup_seg_h = shufflevector <16 x i16> poison, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+                                                                               i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %dup_seg_s = shufflevector <8 x i32> poison, <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3,
+                                                                            i32 0, i32 1, i32 2, i32 3>
+  %dup_seg_d = shufflevector <4 x i64> poison, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %dup_seg_512b_d = shufflevector <8 x double> poison, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1,
+                                                                                       i32 0, i32 1, i32 0, i32 1>
+  %dup_seg_s_with_poison = shufflevector <8 x float> poison, <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 3,
+                                                                                            i32 poison, i32 1, i32 2, i32 3>
+  ret void
+}
+
 attributes #0 = { noinline vscale_range(2,2) }
 attributes #1 = { noinline vscale_range(4,4) }


        


More information about the llvm-commits mailing list