[llvm] 052225d - [AArch64] Use a lower Costsize cost in getScalarizationOverhead.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 11 12:18:30 PDT 2025
Author: David Green
Date: 2025-04-11T20:18:26+01:00
New Revision: 052225dc0366ddf56631b1e4104ef09896f1139b
URL: https://github.com/llvm/llvm-project/commit/052225dc0366ddf56631b1e4104ef09896f1139b
DIFF: https://github.com/llvm/llvm-project/commit/052225dc0366ddf56631b1e4104ef09896f1139b.diff
LOG: [AArch64] Use a lower Costsize cost in getScalarizationOverhead.
This is a follow on to #130946 to use the same codesize cost override in
getScalarizationOverhead for vector instructions.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AArch64/bswap.ll
llvm/test/Analysis/CostModel/AArch64/ctlz.ll
llvm/test/Analysis/CostModel/AArch64/cttz.ll
llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b1d8277182add..ca1a486901951 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3832,8 +3832,9 @@ InstructionCost AArch64TTIImpl::getScalarizationOverhead(
if (Ty->getElementType()->isFloatingPointTy())
return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
CostKind);
- return DemandedElts.popcount() * (Insert + Extract) *
- ST->getVectorInsertExtractBaseCost();
+ unsigned VecInstCost =
+ CostKind == TTI::TCK_CodeSize ? 1 : ST->getVectorInsertExtractBaseCost();
+ return DemandedElts.popcount() * (Insert + Extract) * VecInstCost;
}
InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
diff --git a/llvm/test/Analysis/CostModel/AArch64/bswap.ll b/llvm/test/Analysis/CostModel/AArch64/bswap.ll
index 8dad1f218577a..2df508ebe40bc 100644
--- a/llvm/test/Analysis/CostModel/AArch64/bswap.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/bswap.ll
@@ -44,7 +44,7 @@ define void @neon() {
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4i64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v3i32 = call <3 x i32> @llvm.bswap.v3i32(<3 x i32> undef)
-; CHECK-NEXT: Cost Model: Found costs of 12 for: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:12 SizeLat:12 for: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%v4i16 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/ctlz.ll b/llvm/test/Analysis/CostModel/AArch64/ctlz.ll
index 4ad359d4d2c68..34e7f87028477 100644
--- a/llvm/test/Analysis/CostModel/AArch64/ctlz.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/ctlz.ll
@@ -55,7 +55,7 @@ declare i8 @llvm.ctlz.i8(i8)
define <2 x i64> @test_ctlz_v2i64(<2 x i64> %a) {
;
; CHECK-LABEL: 'test_ctlz_v2i64'
-; CHECK-NEXT: Cost Model: Found costs of 10 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctlz
;
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
@@ -154,7 +154,7 @@ define <16 x i8> @test_ctlz_v16i8(<16 x i8> %a) {
define <4 x i64> @test_ctlz_v4i64(<4 x i64> %a) {
; CHECK-LABEL: 'test_ctlz_v4i64'
-; CHECK-NEXT: Cost Model: Found costs of 20 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctlz
;
%ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
diff --git a/llvm/test/Analysis/CostModel/AArch64/cttz.ll b/llvm/test/Analysis/CostModel/AArch64/cttz.ll
index 021eb73234f31..6f7810372388a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cttz.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cttz.ll
@@ -55,7 +55,7 @@ declare i8 @llvm.cttz.i8(i8)
define <2 x i64> @test_cttz_v2i64(<2 x i64> %a) {
;
; CHECK-LABEL: 'test_cttz_v2i64'
-; CHECK-NEXT: Cost Model: Found costs of 10 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %cttz
;
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
@@ -65,7 +65,7 @@ define <2 x i64> @test_cttz_v2i64(<2 x i64> %a) {
define <2 x i32> @test_cttz_v2i32(<2 x i32> %a) {
;
; CHECK-LABEL: 'test_cttz_v2i32'
-; CHECK-NEXT: Cost Model: Found costs of 10 for: %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %cttz
;
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
@@ -75,7 +75,7 @@ define <2 x i32> @test_cttz_v2i32(<2 x i32> %a) {
define <4 x i32> @test_cttz_v4i32(<4 x i32> %a) {
;
; CHECK-LABEL: 'test_cttz_v4i32'
-; CHECK-NEXT: Cost Model: Found costs of 20 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %cttz
;
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
@@ -85,7 +85,7 @@ define <4 x i32> @test_cttz_v4i32(<4 x i32> %a) {
define <2 x i16> @test_cttz_v2i16(<2 x i16> %a) {
;
; CHECK-LABEL: 'test_cttz_v2i16'
-; CHECK-NEXT: Cost Model: Found costs of 10 for: %cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i16> %cttz
;
%cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
@@ -95,7 +95,7 @@ define <2 x i16> @test_cttz_v2i16(<2 x i16> %a) {
define <4 x i16> @test_cttz_v4i16(<4 x i16> %a) {
;
; CHECK-LABEL: 'test_cttz_v4i16'
-; CHECK-NEXT: Cost Model: Found costs of 20 for: %cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %cttz
;
%cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
@@ -105,7 +105,7 @@ define <4 x i16> @test_cttz_v4i16(<4 x i16> %a) {
define <8 x i16> @test_cttz_v8i16(<8 x i16> %a) {
;
; CHECK-LABEL: 'test_cttz_v8i16'
-; CHECK-NEXT: Cost Model: Found costs of 40 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %cttz
;
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
@@ -115,7 +115,7 @@ define <8 x i16> @test_cttz_v8i16(<8 x i16> %a) {
define <2 x i8> @test_cttz_v2i8(<2 x i8> %a) {
;
; CHECK-LABEL: 'test_cttz_v2i8'
-; CHECK-NEXT: Cost Model: Found costs of 10 for: %cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i8> %cttz
;
%cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
@@ -125,7 +125,7 @@ define <2 x i8> @test_cttz_v2i8(<2 x i8> %a) {
define <4 x i8> @test_cttz_v4i8(<4 x i8> %a) {
;
; CHECK-LABEL: 'test_cttz_v4i8'
-; CHECK-NEXT: Cost Model: Found costs of 20 for: %cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %cttz
;
%cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
@@ -135,7 +135,7 @@ define <4 x i8> @test_cttz_v4i8(<4 x i8> %a) {
define <8 x i8> @test_cttz_v8i8(<8 x i8> %a) {
;
; CHECK-LABEL: 'test_cttz_v8i8'
-; CHECK-NEXT: Cost Model: Found costs of 40 for: %cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %cttz
;
%cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
@@ -145,7 +145,7 @@ define <8 x i8> @test_cttz_v8i8(<8 x i8> %a) {
define <16 x i8> @test_cttz_v16i8(<16 x i8> %a) {
;
; CHECK-LABEL: 'test_cttz_v16i8'
-; CHECK-NEXT: Cost Model: Found costs of 80 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:48 Lat:80 SizeLat:80 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %cttz
;
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
@@ -155,7 +155,7 @@ define <16 x i8> @test_cttz_v16i8(<16 x i8> %a) {
define <4 x i64> @test_cttz_v4i64(<4 x i64> %a) {
;
; CHECK-LABEL: 'test_cttz_v4i64'
-; CHECK-NEXT: Cost Model: Found costs of 20 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %cttz
;
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
@@ -165,7 +165,7 @@ define <4 x i64> @test_cttz_v4i64(<4 x i64> %a) {
define <8 x i32> @test_cttz_v8i32(<8 x i32> %a) {
;
; CHECK-LABEL: 'test_cttz_v8i32'
-; CHECK-NEXT: Cost Model: Found costs of 40 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %cttz
;
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
@@ -175,7 +175,7 @@ define <8 x i32> @test_cttz_v8i32(<8 x i32> %a) {
define <16 x i16> @test_cttz_v16i16(<16 x i16> %a) {
;
; CHECK-LABEL: 'test_cttz_v16i16'
-; CHECK-NEXT: Cost Model: Found costs of 80 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:48 Lat:80 SizeLat:80 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %cttz
;
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
@@ -185,7 +185,7 @@ define <16 x i16> @test_cttz_v16i16(<16 x i16> %a) {
define <32 x i8> @test_cttz_v32i8(<32 x i8> %a) {
;
; CHECK-LABEL: 'test_cttz_v32i8'
-; CHECK-NEXT: Cost Model: Found costs of 160 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
+; CHECK-NEXT: Cost Model: Found costs of RThru:160 CodeSize:96 Lat:160 SizeLat:160 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %cttz
;
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
diff --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
index 5a1ed4e7e07ae..56ae1ac86c825 100644
--- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
@@ -5,24 +5,24 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
define void @fixed() {
; CHECK-LABEL: 'fixed'
-; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:96 Lat:96 SizeLat:96 for: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:39 Lat:46 SizeLat:46 for: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:156 Lat:184 SizeLat:184 for: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:32 Lat:48 SizeLat:48 for: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:64 Lat:96 SizeLat:96 for: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:32 Lat:48 SizeLat:48 for: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:15 Lat:22 SizeLat:22 for: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:31 Lat:46 SizeLat:46 for: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:15 Lat:22 SizeLat:22 for: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:124 Lat:184 SizeLat:184 for: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
index fb54f8de023cd..ae638e5dd366d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
@@ -190,11 +190,11 @@ declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>,
define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) {
; CHECK: gather_load_4xi8_constant_mask
; CHECK-NEON-LABEL: 'gather_load_4xi8_constant_mask'
-; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %lv
;
; CHECK-SVE-128-LABEL: 'gather_load_4xi8_constant_mask'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of 20 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %lv
;
; CHECK-SVE-256-LABEL: 'gather_load_4xi8_constant_mask'
@@ -212,11 +212,11 @@ define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) {
define <4 x i8> @gather_load_4xi8_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) {
; CHECK: gather_load_4xi8_variable_mask
; CHECK-NEON-LABEL: 'gather_load_4xi8_variable_mask'
-; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %lv
;
; CHECK-SVE-128-LABEL: 'gather_load_4xi8_variable_mask'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %lv
;
; CHECK-SVE-256-LABEL: 'gather_load_4xi8_variable_mask'
@@ -235,11 +235,11 @@ declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32 immarg, <4
define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) {
; CHECK: scatter_store_4xi8_constant_mask
; CHECK-NEON-LABEL: 'scatter_store_4xi8_constant_mask'
-; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_constant_mask'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of 20 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_constant_mask'
@@ -257,11 +257,11 @@ define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) {
define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %cond) {
; CHECK: scatter_store_4xi8_variable_mask
; CHECK-NEON-LABEL: 'scatter_store_4xi8_variable_mask'
-; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_variable_mask'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_variable_mask'
@@ -280,11 +280,11 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1>
define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) {
; CHECK: gather_load_4xi32_constant_mask
; CHECK-NEON-LABEL: 'gather_load_4xi32_constant_mask'
-; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %lv
;
; CHECK-SVE-128-LABEL: 'gather_load_4xi32_constant_mask'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of 20 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %lv
;
; CHECK-SVE-256-LABEL: 'gather_load_4xi32_constant_mask'
@@ -302,11 +302,11 @@ define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) {
define <4 x i32> @gather_load_4xi32_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) {
; CHECK: gather_load_4xi32_variable_mask
; CHECK-NEON-LABEL: 'gather_load_4xi32_variable_mask'
-; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %lv
;
; CHECK-SVE-128-LABEL: 'gather_load_4xi32_variable_mask'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %lv
;
; CHECK-SVE-256-LABEL: 'gather_load_4xi32_variable_mask'
@@ -325,11 +325,11 @@ declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, <
define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) {
; CHECK: scatter_store_4xi32_constant_mask
; CHECK-NEON-LABEL: 'scatter_store_4xi32_constant_mask'
-; CHECK-NEON-NEXT: Cost Model: Found costs of 20 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_constant_mask'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of 20 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_constant_mask'
@@ -347,11 +347,11 @@ define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs)
define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %cond) {
; CHECK: scatter_store_4xi32_variable_mask
; CHECK-NEON-LABEL: 'scatter_store_4xi32_variable_mask'
-; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_variable_mask'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_variable_mask'
@@ -370,11 +370,11 @@ declare <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr>, i32, <256 x
define void @sve_gather_vls(<256 x i1> %v256i1mask) {
; CHECK-LABEL: 'sve_scatter_vls'
; CHECK-NEON-LABEL: 'sve_gather_vls'
-; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1792 CodeSize:2048 Lat:2048 SizeLat:2048 for: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1792 CodeSize:1280 Lat:2048 SizeLat:2048 for: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-128-LABEL: 'sve_gather_vls'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:1792 CodeSize:2048 Lat:2048 SizeLat:2048 for: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:1792 CodeSize:1280 Lat:2048 SizeLat:2048 for: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-256-LABEL: 'sve_gather_vls'
@@ -394,11 +394,11 @@ declare <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr>, i32, <256
define void @sve_gather_vls_float(<256 x i1> %v256i1mask) {
; CHECK-LABEL: 'sve_gather_vls_float'
; CHECK-NEON-LABEL: 'sve_gather_vls_float'
-; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1664 CodeSize:1728 Lat:1920 SizeLat:1920 for: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1664 CodeSize:1216 Lat:1920 SizeLat:1920 for: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-128-LABEL: 'sve_gather_vls_float'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:1664 CodeSize:1728 Lat:1920 SizeLat:1920 for: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:1664 CodeSize:1216 Lat:1920 SizeLat:1920 for: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-256-LABEL: 'sve_gather_vls_float'
@@ -418,11 +418,11 @@ declare void @llvm.masked.scatter.v256i8.v256p0(<256 x i8>, <256 x ptr>, i32, <2
define void @sve_scatter_vls(<256 x i1> %v256i1mask){
; CHECK-LABEL: 'sve_scatter_vls'
; CHECK-NEON-LABEL: 'sve_scatter_vls'
-; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1792 CodeSize:2048 Lat:2048 SizeLat:2048 for: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:1792 CodeSize:1280 Lat:2048 SizeLat:2048 for: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-128-LABEL: 'sve_scatter_vls'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:1792 CodeSize:2048 Lat:2048 SizeLat:2048 for: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:1792 CodeSize:1280 Lat:2048 SizeLat:2048 for: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-256-LABEL: 'sve_scatter_vls'
@@ -442,11 +442,11 @@ declare void @llvm.masked.scatter.v512f16.v512p0(<512 x half>, <512 x ptr>, i32,
define void @sve_scatter_vls_float(<512 x i1> %v512i1mask){
; CHECK-LABEL: 'sve_scatter_vls_float'
; CHECK-NEON-LABEL: 'sve_scatter_vls_float'
-; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:3456 CodeSize:3520 Lat:3968 SizeLat:3968 for: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:3456 CodeSize:2496 Lat:3968 SizeLat:3968 for: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
; CHECK-NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-128-LABEL: 'sve_scatter_vls_float'
-; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:3456 CodeSize:3520 Lat:3968 SizeLat:3968 for: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:3456 CodeSize:2496 Lat:3968 SizeLat:3968 for: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
; CHECK-SVE-128-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-SVE-256-LABEL: 'sve_scatter_vls_float'
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 308a3785c9f05..1483d476bef0d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1103,15 +1103,15 @@ define <vscale x 8 x i32> @masked_gather_nxv8i32(<vscale x 8 x ptr> %ld, <vscale
define <4 x i32> @masked_gather_v4i32(<4 x ptr> %ld, <4 x i1> %masks, <4 x i32> %passthru) {
; CHECK-VSCALE-1-LABEL: 'masked_gather_v4i32'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res
;
; CHECK-VSCALE-2-LABEL: 'masked_gather_v4i32'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res
;
; TYPE_BASED_ONLY-LABEL: 'masked_gather_v4i32'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 36 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:36 CodeSize:20 Lat:36 SizeLat:36 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res
;
%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru)
@@ -1120,11 +1120,11 @@ define <4 x i32> @masked_gather_v4i32(<4 x ptr> %ld, <4 x i1> %masks, <4 x i32>
define <1 x i128> @masked_gather_v1i128(<1 x ptr> %ld, <1 x i1> %masks, <1 x i128> %passthru) {
; CHECK-VSCALE-1-LABEL: 'masked_gather_v1i128'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:8 SizeLat:9 for: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <1 x i128> %res
;
; CHECK-VSCALE-2-LABEL: 'masked_gather_v1i128'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:8 SizeLat:9 for: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru)
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <1 x i128> %res
;
; TYPE_BASED_ONLY-LABEL: 'masked_gather_v1i128'
@@ -1173,15 +1173,15 @@ define void @masked_scatter_nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x ptr>
define void @masked_scatter_v4i32(<4 x i32> %data, <4 x ptr> %ptrs, <4 x i1> %masks) {
; CHECK-VSCALE-1-LABEL: 'masked_scatter_v4i32'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-VSCALE-2-LABEL: 'masked_scatter_v4i32'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:20 Lat:32 SizeLat:32 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v4i32'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 28 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:28 CodeSize:16 Lat:28 SizeLat:28 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
@@ -1191,11 +1191,11 @@ define void @masked_scatter_v4i32(<4 x i32> %data, <4 x ptr> %ptrs, <4 x i1> %ma
define void @masked_scatter_v1i128(<1 x i128> %data, <1 x ptr> %ptrs, <1 x i1> %masks) {
; CHECK-VSCALE-1-LABEL: 'masked_scatter_v1i128'
-; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:8 SizeLat:9 for: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
+; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; CHECK-VSCALE-2-LABEL: 'masked_scatter_v1i128'
-; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:8 SizeLat:9 for: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
+; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:8 SizeLat:9 for: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks)
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v1i128'
More information about the llvm-commits
mailing list