[llvm] 052225d - [AArch64] Use a lower Costsize cost in getScalarizationOverhead.

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 11 12:18:30 PDT 2025


Author: David Green
Date: 2025-04-11T20:18:26+01:00
New Revision: 052225dc0366ddf56631b1e4104ef09896f1139b

URL: https://github.com/llvm/llvm-project/commit/052225dc0366ddf56631b1e4104ef09896f1139b
DIFF: https://github.com/llvm/llvm-project/commit/052225dc0366ddf56631b1e4104ef09896f1139b.diff

LOG: [AArch64] Use a lower Costsize cost in getScalarizationOverhead.

This is a follow on to #130946 to use the same codesize cost override in
getScalarizationOverhead for vector instructions.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/bswap.ll
    llvm/test/Analysis/CostModel/AArch64/ctlz.ll
    llvm/test/Analysis/CostModel/AArch64/cttz.ll
    llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
    llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
    llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b1d8277182add..ca1a486901951 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3832,8 +3832,9 @@ InstructionCost AArch64TTIImpl::getScalarizationOverhead(
   if (Ty->getElementType()->isFloatingPointTy())
     return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
                                            CostKind);
-  return DemandedElts.popcount() * (Insert + Extract) *
-         ST->getVectorInsertExtractBaseCost();
+  unsigned VecInstCost =
+      CostKind == TTI::TCK_CodeSize ? 1 : ST->getVectorInsertExtractBaseCost();
+  return DemandedElts.popcount() * (Insert + Extract) * VecInstCost;
 }
 
 InstructionCost AArch64TTIImpl::getArithmeticInstrCost(

diff  --git a/llvm/test/Analysis/CostModel/AArch64/bswap.ll b/llvm/test/Analysis/CostModel/AArch64/bswap.ll
index 8dad1f218577a..2df508ebe40bc 100644
--- a/llvm/test/Analysis/CostModel/AArch64/bswap.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/bswap.ll
@@ -44,7 +44,7 @@ define void @neon() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2i64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4i64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v3i32 = call <3 x i32> @llvm.bswap.v3i32(<3 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of 12 for: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:12 SizeLat:12 for: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v4i16 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/ctlz.ll b/llvm/test/Analysis/CostModel/AArch64/ctlz.ll
index 4ad359d4d2c68..34e7f87028477 100644
--- a/llvm/test/Analysis/CostModel/AArch64/ctlz.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/ctlz.ll
@@ -55,7 +55,7 @@ declare i8 @llvm.ctlz.i8(i8)
 define <2 x i64> @test_ctlz_v2i64(<2 x i64> %a) {
 ;
 ; CHECK-LABEL: 'test_ctlz_v2i64'
-; CHECK-NEXT:  Cost Model: Found costs of 10 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctlz
 ;
   %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
@@ -154,7 +154,7 @@ define <16 x i8> @test_ctlz_v16i8(<16 x i8> %a) {
 
 define <4 x i64> @test_ctlz_v4i64(<4 x i64> %a) {
 ; CHECK-LABEL: 'test_ctlz_v4i64'
-; CHECK-NEXT:  Cost Model: Found costs of 20 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctlz
 ;
   %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/cttz.ll b/llvm/test/Analysis/CostModel/AArch64/cttz.ll
index 021eb73234f31..6f7810372388a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cttz.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cttz.ll
@@ -55,7 +55,7 @@ declare i8 @llvm.cttz.i8(i8)
 define <2 x i64> @test_cttz_v2i64(<2 x i64> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v2i64'
-; CHECK-NEXT:  Cost Model: Found costs of 10 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %cttz
 ;
   %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
@@ -65,7 +65,7 @@ define <2 x i64> @test_cttz_v2i64(<2 x i64> %a) {
 define <2 x i32> @test_cttz_v2i32(<2 x i32> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v2i32'
-; CHECK-NEXT:  Cost Model: Found costs of 10 for: %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %cttz
 ;
   %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
@@ -75,7 +75,7 @@ define <2 x i32> @test_cttz_v2i32(<2 x i32> %a) {
 define <4 x i32> @test_cttz_v4i32(<4 x i32> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v4i32'
-; CHECK-NEXT:  Cost Model: Found costs of 20 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %cttz
 ;
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
@@ -85,7 +85,7 @@ define <4 x i32> @test_cttz_v4i32(<4 x i32> %a) {
 define <2 x i16> @test_cttz_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v2i16'
-; CHECK-NEXT:  Cost Model: Found costs of 10 for: %cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i16> %cttz
 ;
   %cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
@@ -95,7 +95,7 @@ define <2 x i16> @test_cttz_v2i16(<2 x i16> %a) {
 define <4 x i16> @test_cttz_v4i16(<4 x i16> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v4i16'
-; CHECK-NEXT:  Cost Model: Found costs of 20 for: %cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %cttz
 ;
   %cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
@@ -105,7 +105,7 @@ define <4 x i16> @test_cttz_v4i16(<4 x i16> %a) {
 define <8 x i16> @test_cttz_v8i16(<8 x i16> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v8i16'
-; CHECK-NEXT:  Cost Model: Found costs of 40 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %cttz
 ;
   %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
@@ -115,7 +115,7 @@ define <8 x i16> @test_cttz_v8i16(<8 x i16> %a) {
 define <2 x i8> @test_cttz_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v2i8'
-; CHECK-NEXT:  Cost Model: Found costs of 10 for: %cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i8> %cttz
 ;
   %cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
@@ -125,7 +125,7 @@ define <2 x i8> @test_cttz_v2i8(<2 x i8> %a) {
 define <4 x i8> @test_cttz_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v4i8'
-; CHECK-NEXT:  Cost Model: Found costs of 20 for: %cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %cttz
 ;
   %cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
@@ -135,7 +135,7 @@ define <4 x i8> @test_cttz_v4i8(<4 x i8> %a) {
 define <8 x i8> @test_cttz_v8i8(<8 x i8> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v8i8'
-; CHECK-NEXT:  Cost Model: Found costs of 40 for: %cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %cttz
 ;
   %cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
@@ -145,7 +145,7 @@ define <8 x i8> @test_cttz_v8i8(<8 x i8> %a) {
 define <16 x i8> @test_cttz_v16i8(<16 x i8> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v16i8'
-; CHECK-NEXT:  Cost Model: Found costs of 80 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:48 Lat:80 SizeLat:80 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %cttz
 ;
   %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
@@ -155,7 +155,7 @@ define <16 x i8> @test_cttz_v16i8(<16 x i8> %a) {
 define <4 x i64> @test_cttz_v4i64(<4 x i64> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v4i64'
-; CHECK-NEXT:  Cost Model: Found costs of 20 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %cttz
 ;
   %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
@@ -165,7 +165,7 @@ define <4 x i64> @test_cttz_v4i64(<4 x i64> %a) {
 define <8 x i32> @test_cttz_v8i32(<8 x i32> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v8i32'
-; CHECK-NEXT:  Cost Model: Found costs of 40 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %cttz
 ;
   %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
@@ -175,7 +175,7 @@ define <8 x i32> @test_cttz_v8i32(<8 x i32> %a) {
 define <16 x i16> @test_cttz_v16i16(<16 x i16> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v16i16'
-; CHECK-NEXT:  Cost Model: Found costs of 80 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:48 Lat:80 SizeLat:80 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %cttz
 ;
   %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
@@ -185,7 +185,7 @@ define <16 x i16> @test_cttz_v16i16(<16 x i16> %a) {
 define <32 x i8> @test_cttz_v32i8(<32 x i8> %a) {
 ;
 ; CHECK-LABEL: 'test_cttz_v32i8'
-; CHECK-NEXT:  Cost Model: Found costs of 160 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:160 CodeSize:96 Lat:160 SizeLat:160 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %cttz
 ;
   %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
index 5a1ed4e7e07ae..56ae1ac86c825 100644
--- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
@@ -5,24 +5,24 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @fixed() {
 ; CHECK-LABEL: 'fixed'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:96 Lat:96 SizeLat:96 for: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:39 Lat:46 SizeLat:46 for: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:156 Lat:184 SizeLat:184 for: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:32 Lat:48 SizeLat:48 for: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:80 CodeSize:64 Lat:96 SizeLat:96 for: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:40 CodeSize:32 Lat:48 SizeLat:48 for: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:15 Lat:22 SizeLat:22 for: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:31 Lat:46 SizeLat:46 for: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:18 CodeSize:15 Lat:22 SizeLat:22 for: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:124 Lat:184 SizeLat:184 for: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 entry:

diff  --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
index fb54f8de023cd..ae638e5dd366d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
@@ -190,11 +190,11 @@ declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>,
 define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) {
 ; CHECK:         gather_load_4xi8_constant_mask
 ; CHECK-NEON-LABEL: 'gather_load_4xi8_constant_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %lv
 ;
 ; CHECK-SVE-128-LABEL: 'gather_load_4xi8_constant_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of 20 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %lv
 ;
 ; CHECK-SVE-256-LABEL: 'gather_load_4xi8_constant_mask'
@@ -212,11 +212,11 @@ define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) {
 define <4 x i8> @gather_load_4xi8_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) {
 ; CHECK:         gather_load_4xi8_variable_mask
 ; CHECK-NEON-LABEL: 'gather_load_4xi8_variable_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %lv
 ;
 ; CHECK-SVE-128-LABEL: 'gather_load_4xi8_variable_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %lv
 ;
 ; CHECK-SVE-256-LABEL: 'gather_load_4xi8_variable_mask'
@@ -235,11 +235,11 @@ declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32 immarg, <4
 define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) {
 ; CHECK:         scatter_store_4xi8_constant_mask
 ; CHECK-NEON-LABEL: 'scatter_store_4xi8_constant_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_constant_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of 20 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_constant_mask'
@@ -257,11 +257,11 @@ define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) {
 define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %cond) {
 ; CHECK:         scatter_store_4xi8_variable_mask
 ; CHECK-NEON-LABEL: 'scatter_store_4xi8_variable_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_variable_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_variable_mask'
@@ -280,11 +280,11 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1>
 define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) {
 ; CHECK:         gather_load_4xi32_constant_mask
 ; CHECK-NEON-LABEL: 'gather_load_4xi32_constant_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %lv
 ;
 ; CHECK-SVE-128-LABEL: 'gather_load_4xi32_constant_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of 20 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %lv
 ;
 ; CHECK-SVE-256-LABEL: 'gather_load_4xi32_constant_mask'
@@ -302,11 +302,11 @@ define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) {
 define <4 x i32> @gather_load_4xi32_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) {
 ; CHECK:         gather_load_4xi32_variable_mask
 ; CHECK-NEON-LABEL: 'gather_load_4xi32_variable_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %lv
 ;
 ; CHECK-SVE-128-LABEL: 'gather_load_4xi32_variable_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %lv
 ;
 ; CHECK-SVE-256-LABEL: 'gather_load_4xi32_variable_mask'
@@ -325,11 +325,11 @@ declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, <
 define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) {
 ; CHECK:         scatter_store_4xi32_constant_mask
 ; CHECK-NEON-LABEL: 'scatter_store_4xi32_constant_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of 20 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_constant_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of 20 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_constant_mask'
@@ -347,11 +347,11 @@ define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs)
 define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %cond) {
 ; CHECK:         scatter_store_4xi32_variable_mask
 ; CHECK-NEON-LABEL: 'scatter_store_4xi32_variable_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_variable_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_variable_mask'
@@ -370,11 +370,11 @@ declare <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr>, i32, <256 x
 define void @sve_gather_vls(<256 x i1> %v256i1mask) {
 ; CHECK-LABEL: 'sve_scatter_vls'
 ; CHECK-NEON-LABEL: 'sve_gather_vls'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1792 CodeSize:2048 Lat:2048 SizeLat:2048 for: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1792 CodeSize:1280 Lat:2048 SizeLat:2048 for: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'sve_gather_vls'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:1792 CodeSize:2048 Lat:2048 SizeLat:2048 for: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:1792 CodeSize:1280 Lat:2048 SizeLat:2048 for: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'sve_gather_vls'
@@ -394,11 +394,11 @@ declare <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr>, i32, <256
 define void @sve_gather_vls_float(<256 x i1> %v256i1mask) {
 ; CHECK-LABEL: 'sve_gather_vls_float'
 ; CHECK-NEON-LABEL: 'sve_gather_vls_float'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1664 CodeSize:1728 Lat:1920 SizeLat:1920 for: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1664 CodeSize:1216 Lat:1920 SizeLat:1920 for: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'sve_gather_vls_float'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:1664 CodeSize:1728 Lat:1920 SizeLat:1920 for: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:1664 CodeSize:1216 Lat:1920 SizeLat:1920 for: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'sve_gather_vls_float'
@@ -418,11 +418,11 @@ declare void @llvm.masked.scatter.v256i8.v256p0(<256 x i8>, <256 x ptr>, i32, <2
 define void @sve_scatter_vls(<256 x i1> %v256i1mask){
 ; CHECK-LABEL: 'sve_scatter_vls'
 ; CHECK-NEON-LABEL: 'sve_scatter_vls'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1792 CodeSize:2048 Lat:2048 SizeLat:2048 for: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:1792 CodeSize:1280 Lat:2048 SizeLat:2048 for: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'sve_scatter_vls'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:1792 CodeSize:2048 Lat:2048 SizeLat:2048 for: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:1792 CodeSize:1280 Lat:2048 SizeLat:2048 for: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'sve_scatter_vls'
@@ -442,11 +442,11 @@ declare void @llvm.masked.scatter.v512f16.v512p0(<512 x half>, <512 x ptr>, i32,
 define void @sve_scatter_vls_float(<512 x i1> %v512i1mask){
 ; CHECK-LABEL: 'sve_scatter_vls_float'
 ; CHECK-NEON-LABEL: 'sve_scatter_vls_float'
-; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:3456 CodeSize:3520 Lat:3968 SizeLat:3968 for: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:3456 CodeSize:2496 Lat:3968 SizeLat:3968 for: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
 ; CHECK-NEON-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'sve_scatter_vls_float'
-; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:3456 CodeSize:3520 Lat:3968 SizeLat:3968 for: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:3456 CodeSize:2496 Lat:3968 SizeLat:3968 for: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'sve_scatter_vls_float'

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 308a3785c9f05..1483d476bef0d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1103,15 +1103,15 @@ define <vscale x 8 x i32> @masked_gather_nxv8i32(<vscale x 8 x ptr> %ld, <vscale
 
 define <4 x i32> @masked_gather_v4i32(<4 x ptr> %ld, <4 x i1> %masks, <4 x i32> %passthru) {
 ; CHECK-VSCALE-1-LABEL: 'masked_gather_v4i32'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res
 ;
 ; CHECK-VSCALE-2-LABEL: 'masked_gather_v4i32'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res
 ;
 ; TYPE_BASED_ONLY-LABEL: 'masked_gather_v4i32'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 36 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:36 CodeSize:20 Lat:36 SizeLat:36 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res
 ;
   %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
@@ -1120,11 +1120,11 @@ define <4 x i32> @masked_gather_v4i32(<4 x ptr> %ld, <4 x i1> %masks, <4 x i32>
 
 define <1 x i128> @masked_gather_v1i128(<1 x ptr> %ld, <1 x i1> %masks, <1 x i128> %passthru) {
 ; CHECK-VSCALE-1-LABEL: 'masked_gather_v1i128'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:9 Lat:8 SizeLat:9 for: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <1 x i128> %res
 ;
 ; CHECK-VSCALE-2-LABEL: 'masked_gather_v1i128'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:9 Lat:8 SizeLat:9 for: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <1 x i128> %res
 ;
 ; TYPE_BASED_ONLY-LABEL: 'masked_gather_v1i128'
@@ -1173,15 +1173,15 @@ define void @masked_scatter_nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x ptr>
 
 define void @masked_scatter_v4i32(<4 x i32> %data, <4 x ptr> %ptrs, <4 x i1> %masks) {
 ; CHECK-VSCALE-1-LABEL: 'masked_scatter_v4i32'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'masked_scatter_v4i32'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v4i32'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 28 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 
@@ -1191,11 +1191,11 @@ define void @masked_scatter_v4i32(<4 x i32> %data, <4 x ptr> %ptrs, <4 x i1> %ma
 
 define void @masked_scatter_v1i128(<1 x i128> %data, <1 x ptr> %ptrs, <1 x i1> %masks) {
 ; CHECK-VSCALE-1-LABEL: 'masked_scatter_v1i128'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:9 Lat:8 SizeLat:9 for: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'masked_scatter_v1i128'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:9 Lat:8 SizeLat:9 for: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v1i128'


        


More information about the llvm-commits mailing list