[llvm] [AArch64] Fix cost modelling for saturating arithmetic intrinsics (PR #152333)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 6 09:23:12 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Mary Kassayova (marykass-arm)

<details>
<summary>Changes</summary>

The cost model previously overestimating throughput costs to wide fixed-length saturating arithmetic intrinsics when using SVE with a fixed vscale of 2. These costs ended up much higher than for the same operations using NEON, despite being fully legal and efficient with SVE. This patch adjusts the cost model to avoid penalising these intrinsics under SVE.

---

Patch is 54.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152333.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+7) 
- (modified) llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll (+187-50) 
- (modified) llvm/test/Analysis/CostModel/AArch64/arith-usat.ll (+186-49) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 9f05add8bc1c1..a112d69fbead2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -631,6 +631,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
         LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
     if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
       return LT.first * Instrs;
+    
+    TypeSize TS = getDataLayout().getTypeSizeInBits(RetTy);
+    uint64_t VectorSize = TS.getKnownMinValue();
+
+    if (ST->isSVEAvailable() && VectorSize >= 128 && isPowerOf2_64(VectorSize))
+      return LT.first * Instrs;      
+
     break;
   }
   case Intrinsic::abs: {
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
index 254715ac909e1..1ecf3980605b4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-ssat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mtriple=aarch64 < %s | FileCheck %s  --check-prefix=BASE
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -31,31 +32,31 @@ declare <32 x i8>  @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.sadd.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @add(i32 %arg) {
-; CHECK-LABEL: 'add'
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; BASE-LABEL: 'add'
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V3I32 = call <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32> undef, <3 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
   %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
@@ -87,6 +88,74 @@ define i32 @add(i32 %arg) {
   ret i32 undef
 }
 
+define i32 @add_sve_vscale2(i32 %arg) vscale_range(2,2) {
+; BASE-LABEL: 'add_sve_vscale2'
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 poison
+;
+; SVE-LABEL: 'add_sve_vscale2'
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+; SVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison
+;
+  %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> poison, <2 x i64> poison)
+  %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> poison, <4 x i64> poison)
+  %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> poison, <8 x i64> poison)
+
+  %V2I32  = call <2 x i32>  @llvm.sadd.sat.v2i32(<2 x i32> poison, <2 x i32> poison)
+  %V4I32  = call <4 x i32>  @llvm.sadd.sat.v4i32(<4 x i32> poison, <4 x i32> poison)
+  %V8I32  = call <8 x i32>  @llvm.sadd.sat.v8i32(<8 x i32> poison, <8 x i32> poison)
+  %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> poison, <16 x i32> poison)
+
+  %V2I16  = call <2 x i16>  @llvm.sadd.sat.v2i16(<2 x i16> poison, <2 x i16> poison)
+  %V4I16  = call <4 x i16>  @llvm.sadd.sat.v4i16(<4 x i16> poison, <4 x i16> poison)
+  %V8I16  = call <8 x i16>  @llvm.sadd.sat.v8i16(<8 x i16> poison, <8 x i16> poison)
+  %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> poison, <16 x i16> poison)
+  %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> poison, <32 x i16> poison)
+
+  %V2I8  = call <2 x i8>  @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> poison)
+  %V4I8  = call <4 x i8>  @llvm.sadd.sat.v4i8(<4 x i8> poison, <4 x i8> poison)
+  %V8I8  = call <8 x i8>  @llvm.sadd.sat.v8i8(<8 x i8> poison, <8 x i8> poison)
+  %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> poison, <16 x i8> poison)
+  %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> poison, <32 x i8> poison)
+  %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> poison, <64 x i8> poison)
+
+  ret i32 poison
+}
+
 declare i64        @llvm.ssub.sat.i64(i64, i64)
 declare <2 x i64>  @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>)
 declare <4 x i64>  @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>)
@@ -114,30 +183,30 @@ declare <32 x i8>  @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>)
 declare <64 x i8>  @llvm.ssub.sat.v64i8(<64 x i8>, <64 x i8>)
 
 define i32 @sub(i32 %arg) {
-; CHECK-LABEL: 'sub'
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
+; BASE-LABEL: 'sub'
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; BASE-NEXT:  Cost Model: Found costs of 6 for: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 2 for: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; BASE-NEXT:  Cost Model: Found costs of 6 for: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; BASE-NEXT:  Cost Model: Found costs of 4 for: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8>...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/152333


More information about the llvm-commits mailing list