[llvm] [AArch64] Fix cost modelling for saturating arithmetic intrinsics (PR #152333)

Mary Kassayova via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 6 09:22:39 PDT 2025


https://github.com/marykass-arm created https://github.com/llvm/llvm-project/pull/152333

The cost model previously overestimating throughput costs to wide fixed-length saturating arithmetic intrinsics when using SVE with a fixed vscale of 2. These costs ended up much higher than for the same operations using NEON, despite being fully legal and efficient with SVE. This patch adjusts the cost model to avoid penalising these intrinsics under SVE.

>From bf9d89a536388b70d444be065a466a5136808e72 Mon Sep 17 00:00:00 2001
From: Mary Kassayova <mary.kassayova at arm.com>
Date: Wed, 6 Aug 2025 10:19:22 +0000
Subject: [PATCH] [AArch64] Fix cost modelling for saturating arithmetic
 intrinsics

Change-Id: I0ecfbe0063d58ca04969c2fe3048701c1c1711e8
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |   7 +
 .../Analysis/CostModel/AArch64/arith-ssat.ll  | 237 ++++++++++++++----
 .../Analysis/CostModel/AArch64/arith-usat.ll  | 235 +++++++++++++----
 3 files changed, 380 insertions(+), 99 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 9f05add8bc1c1..a112d69fbead2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -631,6 +631,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
         LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
     if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
       return LT.first * Instrs;
+    
+    TypeSize TS = getDataLayout().getTypeSizeInBits(RetTy);
+    uint64_t VectorSize = TS.getKnownMinValue();
+
+    if (ST->isSVEAvailable() && VectorSize >= 128 && isPowerOf2_64(VectorSize))
+      return LT.first * Instrs;      
+
     break;
   }
   case Intrinsic::abs: {
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
index 254715ac909e1..1ecf3980605b4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s  --check-prefix=BASE
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -31,31 +32,31 @@ declare <32 x i8>  @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @add(i32 %arg) {
-; CHECK-LABEL: 'add'
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; BASE-LABEL: 'add'
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -87,6 +88,74 @@ define i32 @add(i32 %arg) {
   ret i32 undef
 }
 
+define i32 @add_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'add_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'add_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}
+
 declare i64        @llvm.ssub.sat.i64(i64, i64)
 declare <2 x i64>  @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64>  @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>)
@@ -114,30 +183,30 @@ declare <32 x i8>  @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @sub(i32 %arg) {
-; CHECK-LABEL: 'sub'
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; BASE-LABEL: 'sub'
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -167,3 +236,71 @@ define i32 @sub(i32 %arg) {
 
   ret i32 undef
 }
+
+define i32 @sub_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'sub_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'sub_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.ssub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.ssub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.ssub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.ssub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.ssub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.ssub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.ssub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.ssub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.ssub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll
index dba42b1e6ebb0..4993640fddcea 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-usat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s --check-prefix=BASE
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -30,30 +31,30 @@ declare <32 x i8>  @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.uadd.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @add(i32 %arg) {
-; CHECK-LABEL: 'add'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; BASE-LABEL: 'add'
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -84,6 +85,74 @@ define i32 @add(i32 %arg) {
   ret i32 undef
 }
 
+define i32 @add_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'add_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'add_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.uadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.uadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.uadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.uadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.uadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.uadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.uadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.uadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}
+
 declare i64        @llvm.usub.sat.i64(i64, i64)
 declare <2 x i64>  @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64>  @llvm.usub.sat.v4i64(<4 x i64>, <4 x i64>)
@@ -111,30 +180,30 @@ declare <32 x i8>  @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.usub.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @sub(i32 %arg) {
-; CHECK-LABEL: 'sub'
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; BASE-LABEL: 'sub'
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -164,3 +233,71 @@ define i32 @sub(i32 %arg) {
 
   ret i32 undef
 }
+
+define i32 @sub_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'sub_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'sub_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.usub.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.usub.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.usub.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.usub.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.usub.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.usub.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.usub.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.usub.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.usub.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}



More information about the llvm-commits mailing list