[llvm] [AArch64] Update cost model for extracting halves from 128+ bit vectors (PR #155601)

Gaƫtan Bossu via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 1 09:35:28 PDT 2025


https://github.com/gbossu updated https://github.com/llvm/llvm-project/pull/155601

>From dc6667082d2bc00e4702f1a200956e046132fdb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Mon, 23 Jun 2025 12:45:24 +0000
Subject: [PATCH 1/6] Precommit CostModel tests for 128+ bits extract shuffles

---
 .../CostModel/AArch64/shuffle-extract.ll      | 192 +++++++++++++++++-
 1 file changed, 187 insertions(+), 5 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
index 867c35ab79446..bf57e1ca7a605 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
-define void @extract_half() {
-; CHECK-LABEL: 'extract_half'
+define void @extract_half_vscale_any() {
+; CHECK-LABEL: 'extract_half_vscale_any'
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v2i8_lo = shufflevector <2 x i8> poison, <2 x i8> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2i8_hi = shufflevector <2 x i8> poison, <2 x i8> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i8_lo = shufflevector <4 x i8> poison, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
@@ -16,6 +16,9 @@ define void @extract_half() {
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of 8 for: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v2i16_lo = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2i16_hi = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <2 x i32> <i32 0, i32 1>
@@ -59,6 +62,9 @@ define void @extract_half() {
   %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
   %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 
   %v2i16_lo = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> <i32 0>
   %v2i16_hi = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> <i32 1>
@@ -96,8 +102,129 @@ define void @extract_half() {
   ret void
 }
 
-define void @extract_qtr() {
-; CHECK-LABEL: 'extract_qtr'
+; Similar as above, but for a 256-bit SVE core.
+; Vectors smaller than 256-bit aren't tested again.
+define void @extract_half_vscale2() vscale_range(2,2) {
+; CHECK-LABEL: 'extract_half_vscale2'
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+  %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+
+  %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+  %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+  %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+  %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
+  %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+  %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+
+  ret void
+}
+
+; Similar as above, but for a 512-bit SVE core.
+define void @extract_half_vscale4() vscale_range(4,4) {
+; CHECK-LABEL: 'extract_half_vscale4'
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+  %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+
+  %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+  %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+  %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+  %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
+  %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+  %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+
+  ret void
+}
+
+define void @extract_qtr_vscale_any() {
+; CHECK-LABEL: 'extract_qtr_vscale_any'
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i8_lo = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v4i8_mi = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v4i8_hi = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> <i32 2>
@@ -107,6 +234,9 @@ define void @extract_qtr() {
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of 8 for: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of 8 for: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v4i16_mi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v4i16_hi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> <i32 2>
@@ -142,6 +272,9 @@ define void @extract_qtr() {
   %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 
   %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> <i32 0>
   %v4i16_mi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> <i32 1>
@@ -172,3 +305,52 @@ define void @extract_qtr() {
 
   ret void
 }
+
+; Similar as above, but for a 256-bit SVE core.
+; Vectors smaller than 256-bit aren't tested again.
+define void @extract_qtr_vscale2() vscale_range(2,2) {
+; CHECK-LABEL: 'extract_qtr_vscale2'
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
+; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
+; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+  %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+
+  %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 0, i32 1>
+  %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
+  %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
+  %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+
+  %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 0>
+  %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
+  %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
+  %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
+  %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
+  %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
+
+  ret void
+}

>From b0203919ab84ef54b80c1350ca00110bb0e7cf1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Tue, 26 Aug 2025 09:13:52 +0000
Subject: [PATCH 2/6] [AArch64] Update cost model for extracting halves from
 128+ bit vectors

Previously, only 128-bit "NEON" vectors were given sensible costs.
Cores with vscale>1 can use SVE's EXT instruction to perform a
fixed-length subvector extract.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 10 ++++---
 .../CostModel/AArch64/shuffle-extract.ll      | 28 +++++++++----------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 490f6391c15a0..120f29a1aa22c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5750,11 +5750,13 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
 
   Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp);
   bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
-  // A subvector extract can be implemented with an ext (or trivial extract, if
-  // from lane 0). This currently only handles low or high extracts to prevent
-  // SLP vectorizer regressions.
+  // A subvector extract can be implemented with a NEON/SVE ext (or trivial
+  // extract, if from lane 0). This currently only handles low or high extracts
+  // to prevent SLP vectorizer regressions.
+  // Note that SVE's ext instruciton is destructive, but it can be fused with
+  // a movprfx to act like a constructive instruction.
   if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
-    if (LT.second.is128BitVector() &&
+    if (LT.second.getFixedSizeInBits() >= AArch64::SVEBitsPerBlock &&
         cast<FixedVectorType>(SubTp)->getNumElements() ==
             LT.second.getVectorNumElements() / 2) {
       if (Index == 0)
diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
index bf57e1ca7a605..8199f26b9c11c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
@@ -108,25 +108,25 @@ define void @extract_half_vscale2() vscale_range(2,2) {
 ; CHECK-LABEL: 'extract_half_vscale2'
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -168,28 +168,28 @@ define void @extract_half_vscale4() vscale_range(4,4) {
 ; CHECK-LABEL: 'extract_half_vscale4'
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -320,13 +320,13 @@ define void @extract_qtr_vscale2() vscale_range(2,2) {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;

>From 92a30831667f2dd8d529bf36ad32273148225e7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Thu, 28 Aug 2025 08:19:29 +0000
Subject: [PATCH 3/6] Replace SVEBitsPerBlock with plain 128

---
 llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 120f29a1aa22c..0df496735490c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5751,12 +5751,13 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
   Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp);
   bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector;
   // A subvector extract can be implemented with a NEON/SVE ext (or trivial
-  // extract, if from lane 0). This currently only handles low or high extracts
-  // to prevent SLP vectorizer regressions.
+  // extract, if from lane 0) for 128-bit NEON vectors or legal SVE vectors.
+  // This currently only handles low or high extracts to prevent SLP vectorizer
+  // regressions.
   // Note that SVE's ext instruciton is destructive, but it can be fused with
   // a movprfx to act like a constructive instruction.
   if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
-    if (LT.second.getFixedSizeInBits() >= AArch64::SVEBitsPerBlock &&
+    if (LT.second.getFixedSizeInBits() >= 128 &&
         cast<FixedVectorType>(SubTp)->getNumElements() ==
             LT.second.getVectorNumElements() / 2) {
       if (Index == 0)

>From 8d57a656271ea982687c75a410bada16c61231ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Mon, 1 Sep 2025 13:42:42 +0000
Subject: [PATCH 4/6] Restore NEON shuffle-extract.ll test

I'll create a new one for VLS SVE.
---
 .../CostModel/AArch64/shuffle-extract.ll      | 192 +-----------------
 1 file changed, 5 insertions(+), 187 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
index 8199f26b9c11c..867c35ab79446 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s
+; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
-define void @extract_half_vscale_any() {
-; CHECK-LABEL: 'extract_half_vscale_any'
+define void @extract_half() {
+; CHECK-LABEL: 'extract_half'
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v2i8_lo = shufflevector <2 x i8> poison, <2 x i8> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2i8_hi = shufflevector <2 x i8> poison, <2 x i8> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i8_lo = shufflevector <4 x i8> poison, <4 x i8> poison, <2 x i32> <i32 0, i32 1>
@@ -16,9 +16,6 @@ define void @extract_half_vscale_any() {
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of 8 for: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v2i16_lo = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2i16_hi = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <2 x i32> <i32 0, i32 1>
@@ -62,9 +59,6 @@ define void @extract_half_vscale_any() {
   %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
   %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 
   %v2i16_lo = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> <i32 0>
   %v2i16_hi = shufflevector <2 x i16> poison, <2 x i16> poison, <1 x i32> <i32 1>
@@ -102,129 +96,8 @@ define void @extract_half_vscale_any() {
   ret void
 }
 
-; Similar as above, but for a 256-bit SVE core.
-; Vectors smaller than 256-bit aren't tested again.
-define void @extract_half_vscale2() vscale_range(2,2) {
-; CHECK-LABEL: 'extract_half_vscale2'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
-;
-  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
-  %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-
-  %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-  %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-  %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-
-  %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-  %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-
-  %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
-  %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
-  %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
-  %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-
-  ret void
-}
-
-; Similar as above, but for a 512-bit SVE core.
-define void @extract_half_vscale4() vscale_range(4,4) {
-; CHECK-LABEL: 'extract_half_vscale4'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:128 CodeSize:64 Lat:128 SizeLat:128 for: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
-;
-  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
-  %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-
-  %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-  %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
-  %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-
-  %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
-  %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-
-  %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
-  %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
-  %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
-  %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-
-  ret void
-}
-
-define void @extract_qtr_vscale_any() {
-; CHECK-LABEL: 'extract_qtr_vscale_any'
+define void @extract_qtr() {
+; CHECK-LABEL: 'extract_qtr'
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i8_lo = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v4i8_mi = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v4i8_hi = shufflevector <4 x i8> poison, <4 x i8> poison, <1 x i32> <i32 2>
@@ -234,9 +107,6 @@ define void @extract_qtr_vscale_any() {
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:  Cost Model: Found costs of 8 for: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:  Cost Model: Found costs of 8 for: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:  Cost Model: Found costs of 16 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v4i16_mi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> <i32 1>
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v4i16_hi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> <i32 2>
@@ -272,9 +142,6 @@ define void @extract_qtr_vscale_any() {
   %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
-  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 
   %v4i16_lo = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> <i32 0>
   %v4i16_mi = shufflevector <4 x i16> poison, <4 x i16> poison, <1 x i32> <i32 1>
@@ -305,52 +172,3 @@ define void @extract_qtr_vscale_any() {
 
   ret void
 }
-
-; Similar as above, but for a 256-bit SVE core.
-; Vectors smaller than 256-bit aren't tested again.
-define void @extract_qtr_vscale2() vscale_range(2,2) {
-; CHECK-LABEL: 'extract_qtr_vscale2'
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
-; CHECK-NEXT:  Cost Model: Found costs of 0 for: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
-;
-  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-
-  %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
-
-  %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 0, i32 1>
-  %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
-  %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
-  %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
-
-  %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 0>
-  %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
-  %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
-  %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
-  %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
-  %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
-
-  ret void
-}

>From 8d3248507920f1aedb5b75f1590be9e434997203 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Mon, 1 Sep 2025 13:56:53 +0000
Subject: [PATCH 5/6] Add SVE-specific cost test for VLS

---
 .../CostModel/AArch64/shuffle-extract.ll      |   3 +
 .../AArch64/sve-vls-shuffle-extract.ll        | 339 ++++++++++++++++++
 2 files changed, 342 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-vls-shuffle-extract.ll

diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
index 867c35ab79446..07764fbf4acf3 100644
--- a/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-extract.ll
@@ -1,6 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output | FileCheck %s
 
+; This tests the cost of fixed-length subvector extracts for NEON.
+; For the SVE equivalent test, see sve-vls-shuffle-extract.ll
+
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @extract_half() {
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-vls-shuffle-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-vls-shuffle-extract.ll
new file mode 100644
index 0000000000000..65261a8b2c6d6
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-vls-shuffle-extract.ll
@@ -0,0 +1,339 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" 2>&1 -disable-output \
+; RUN:    -aarch64-sve-vector-bits-min=128 \
+; RUN:    | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-1
+; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" 2>&1 -disable-output \
+; RUN:    -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 \
+; RUN:    | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-1
+; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" 2>&1 -disable-output \
+; RUN:    -aarch64-sve-vector-bits-min=256 \
+; RUN:    | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-2
+; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" 2>&1 -disable-output \
+; RUN:    -aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 \
+; RUN:    | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-2
+; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" 2>&1 -disable-output \
+; RUN:    -aarch64-sve-vector-bits-min=512 \
+; RUN:    | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-4
+; RUN: opt < %s -mtriple=aarch64 -mattr=+sve -passes="print<cost-model>" 2>&1 -disable-output \
+; RUN:    -aarch64-sve-vector-bits-min=512 -aarch64-sve-vector-bits-max=512 \
+; RUN:    | FileCheck %s --check-prefixes=VSCALE-ANY,VSCALE-4
+
+; This tests the cost of fixed-length subvector extracts for SVE,
+; either for a minimum vscale or a fixed vscale (aka VLS).
+; For the NEON equivalent test, see shuffle-extract.ll
+
+define void @extract_half_lo() {
+; VSCALE-ANY-LABEL: 'extract_half_lo'
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i16_lo = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_lo = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v2i64_lo = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> zeroinitializer
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+  %v8i16_lo = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %v4i32_lo = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+  %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+  %v2i64_lo = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> <i32 0>
+  %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+  %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+  ret void
+}
+
+define void @extract_half_hi() {
+; VSCALE-1-LABEL: 'extract_half_hi'
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_hi = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> <i32 1>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; VSCALE-2-LABEL: 'extract_half_hi'
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_hi = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> <i32 1>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; VSCALE-4-LABEL: 'extract_half_hi'
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2i64_hi = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> <i32 1>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+
+  %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+  %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+  %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %v2i64_hi = shufflevector <2 x i64> poison, <2 x i64> poison, <1 x i32> <i32 1>
+  %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+  %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+
+  ret void
+}
+
+define void @extract_half_unaligned() {
+; VSCALE-1-LABEL: 'extract_half_unaligned'
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 1, i32 2>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; VSCALE-2-LABEL: 'extract_half_unaligned'
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 1, i32 2>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; VSCALE-4-LABEL: 'extract_half_unaligned'
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 1, i32 2>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39>
+
+  %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+
+  %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <2 x i32> <i32 1, i32 2>
+  %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+
+  %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <2 x i32> <i32 1, i32 2>
+  %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+
+  ret void
+}
+
+define void @extract_qtr_lo() {
+; VSCALE-ANY-LABEL: 'extract_qtr_lo'
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i16_lo = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i32_lo = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> zeroinitializer
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 0, i32 1>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> zeroinitializer
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
+; VSCALE-ANY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %v16i8_lo = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v32i8_lo = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v64i8_lo = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %v8i16_lo = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 0, i32 1>
+  %v16i16_lo = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v32i16_lo = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+  %v4i32_lo = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 0>
+  %v8i32_lo = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 0, i32 1>
+  %v16i32_lo = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+  %v4i64_lo = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 0>
+  %v8i64_lo = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
+
+  ret void
+}
+
+define void @extract_qtr_hi() {
+; VSCALE-1-LABEL: 'extract_qtr_hi'
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 2>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; VSCALE-2-LABEL: 'extract_qtr_hi'
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 2>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; VSCALE-4-LABEL: 'extract_qtr_hi'
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 2>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %v16i8_hi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+  %v32i8_hi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %v64i8_hi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+
+  %v8i16_hi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 4, i32 5>
+  %v16i16_hi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+  %v32i16_hi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+  %v4i32_hi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 2>
+  %v8i32_hi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 4, i32 5>
+  %v16i32_hi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+
+  %v4i64_hi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 2>
+  %v8i64_hi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 4, i32 5>
+
+  ret void
+}
+
+define void @extract_qtr_unaligned() {
+; VSCALE-1-LABEL: 'extract_qtr_unaligned'
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 1>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; VSCALE-2-LABEL: 'extract_qtr_unaligned'
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 1>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; VSCALE-4-LABEL: 'extract_qtr_unaligned'
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 1>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
+; VSCALE-4-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %v16i8_mi = shufflevector <16 x i8> poison, <16 x i8> poison, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  %v32i8_mi = shufflevector <32 x i8> poison, <32 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %v64i8_mi = shufflevector <64 x i8> poison, <64 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+  %v8i16_mi = shufflevector <8 x i16> poison, <8 x i16> poison, <2 x i32> <i32 2, i32 3>
+  %v16i16_mi = shufflevector <16 x i16> poison, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v32i16_mi = shufflevector <32 x i16> poison, <32 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+
+  %v4i32_mi = shufflevector <4 x i32> poison, <4 x i32> poison, <1 x i32> <i32 1>
+  %v8i32_mi = shufflevector <8 x i32> poison, <8 x i32> poison, <2 x i32> <i32 2, i32 3>
+  %v16i32_mi = shufflevector <16 x i32> poison, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+
+  %v4i64_mi = shufflevector <4 x i64> poison, <4 x i64> poison, <1 x i32> <i32 1>
+  %v8i64_mi = shufflevector <8 x i64> poison, <8 x i64> poison, <2 x i32> <i32 2, i32 3>
+
+  ret void
+}

>From e0d3f2d9a5053663b2bd9fbc2e59c7a696f7cd76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <gaetan.bossu at arm.com>
Date: Mon, 1 Sep 2025 16:31:23 +0000
Subject: [PATCH 6/6] Fix typo

---
 llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0df496735490c..29bfd9eee785e 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5754,7 +5754,7 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
   // extract, if from lane 0) for 128-bit NEON vectors or legal SVE vectors.
   // This currently only handles low or high extracts to prevent SLP vectorizer
   // regressions.
-  // Note that SVE's ext instruciton is destructive, but it can be fused with
+  // Note that SVE's ext instruction is destructive, but it can be fused with
   // a movprfx to act like a constructive instruction.
   if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
     if (LT.second.getFixedSizeInBits() >= 128 &&



More information about the llvm-commits mailing list