[llvm] faca9fd - [AArch64] Regenerate CostModel tests with update_analyze_test_checks. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 17 02:23:31 PDT 2023


Author: David Green
Date: 2023-07-17T10:23:27+01:00
New Revision: faca9fdc4f79c24a51c585827de4c2d836c788fe

URL: https://github.com/llvm/llvm-project/commit/faca9fdc4f79c24a51c585827de4c2d836c788fe
DIFF: https://github.com/llvm/llvm-project/commit/faca9fdc4f79c24a51c585827de4c2d836c788fe.diff

LOG: [AArch64] Regenerate CostModel tests with update_analyze_test_checks. NFC

Added: 
    

Modified: 
    llvm/test/Analysis/CostModel/AArch64/bswap.ll
    llvm/test/Analysis/CostModel/AArch64/ctpop.ll
    llvm/test/Analysis/CostModel/AArch64/div_cte.ll
    llvm/test/Analysis/CostModel/AArch64/gep.ll
    llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
    llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
    llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AArch64/bswap.ll b/llvm/test/Analysis/CostModel/AArch64/bswap.ll
index a3596e09060745..aaffb473c245ce 100644
--- a/llvm/test/Analysis/CostModel/AArch64/bswap.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/bswap.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64--linux-gnu < %s | FileCheck %s
 
 ; Verify the cost of bswap instructions.
@@ -16,57 +17,73 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
 
 define i16 @bswap_i16(i16 %a) {
-; CHECK: function 'bswap_i16'
-; CHECK: Found an estimated cost of 1 for instruction:   %bswap
+; CHECK-LABEL: 'bswap_i16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bswap = tail call i16 @llvm.bswap.i16(i16 %a)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+;
   %bswap = tail call i16 @llvm.bswap.i16(i16 %a)
   ret i16 %bswap
 }
 
 define i32 @bswap_i32(i32 %a) {
-; CHECK: function 'bswap_i32'
-; CHECK: Found an estimated cost of 1 for instruction:   %bswap
+; CHECK-LABEL: 'bswap_i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bswap = tail call i32 @llvm.bswap.i32(i32 %a)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
+;
   %bswap = tail call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %bswap
 }
 
 define i64 @bswap_i64(i64 %a) {
-; CHECK: function 'bswap_i64'
-; CHECK: Found an estimated cost of 1 for instruction:   %bswap
+; CHECK-LABEL: 'bswap_i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bswap = tail call i64 @llvm.bswap.i64(i64 %a)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
   %bswap = tail call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %bswap
 }
 
 define <2 x i32> @bswap_v2i32(<2 x i32> %a) {
-; CHECK: function 'bswap_v2i32'
-; CHECK: Found an estimated cost of 8 for instruction:   %bswap
+; CHECK-LABEL: 'bswap_v2i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %bswap
+;
   %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
   ret <2 x i32> %bswap
 }
 
 define <4 x i16> @bswap_v4i16(<4 x i16> %a) {
-; CHECK: function 'bswap_v4i16'
-; CHECK: Found an estimated cost of 22 for instruction:   %bswap
+; CHECK-LABEL: 'bswap_v4i16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %bswap = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %bswap
+;
   %bswap = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a)
   ret <4 x i16> %bswap
 }
 
 define <2 x i64> @bswap_v2i64(<2 x i64> %a) {
-; CHECK: function 'bswap_v2i64'
-; CHECK: Found an estimated cost of 8 for instruction:   %bswap
+; CHECK-LABEL: 'bswap_v2i64'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %bswap
+;
   %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
   ret <2 x i64> %bswap
 }
 
 define <4 x i32> @bswap_v4i32(<4 x i32> %a) {
-; CHECK: function 'bswap_v4i32'
-; CHECK: Found an estimated cost of 22 for instruction:   %bswap
+; CHECK-LABEL: 'bswap_v4i32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %bswap
+;
   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a)
   ret <4 x i32> %bswap
 }
 
 define <8 x i16> @bswap_v8i16(<8 x i16> %a) {
-; CHECK: function 'bswap_v8i16'
-; CHECK: Found an estimated cost of 50 for instruction:   %bswap
+; CHECK-LABEL: 'bswap_v8i16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %bswap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %bswap
+;
   %bswap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
   ret <8 x i16> %bswap
 }

diff  --git a/llvm/test/Analysis/CostModel/AArch64/ctpop.ll b/llvm/test/Analysis/CostModel/AArch64/ctpop.ll
index 03e719fa7eeee6..ba1033076e372e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/ctpop.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/ctpop.ll
@@ -9,6 +9,7 @@ define i64 @test_ctpop_i64(i64 %a) {
 ; CHECK-LABEL: 'test_ctpop_i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctpop
+;
   %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
   ret i64 %ctpop
 }
@@ -17,6 +18,7 @@ define i32 @test_ctpop_i32(i32 %a) {
 ; CHECK-LABEL: 'test_ctpop_i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %ctpop
+;
   %ctpop = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %ctpop
 }
@@ -25,6 +27,7 @@ define i16 @test_ctpop_i16(i16 %a) {
 ; CHECK-LABEL: 'test_ctpop_i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %ctpop
+;
   %ctpop = call i16 @llvm.ctpop.i16(i16 %a)
   ret i16 %ctpop
 }
@@ -33,6 +36,7 @@ define i8 @test_ctpop_i8(i8 %a) {
 ; CHECK-LABEL: 'test_ctpop_i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %ctpop
+;
   %ctpop = call i8 @llvm.ctpop.i8(i8 %a)
   ret i8 %ctpop
 }
@@ -48,6 +52,7 @@ define <2 x i64> @test_ctpop_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'test_ctpop_v2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctpop
+;
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   ret <2 x i64> %ctpop
 }
@@ -56,6 +61,7 @@ define <2 x i32> @test_ctpop_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: 'test_ctpop_v2i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %ctpop
+;
   %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   ret <2 x i32> %ctpop
 }
@@ -64,6 +70,7 @@ define <4 x i32> @test_ctpop_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'test_ctpop_v4i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %ctpop
+;
   %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
   ret <4 x i32> %ctpop
 }
@@ -72,6 +79,7 @@ define <2 x i16> @test_ctpop_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: 'test_ctpop_v2i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i16> %ctpop
+;
   %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %a)
   ret <2 x i16> %ctpop
 }
@@ -80,6 +88,7 @@ define <4 x i16> @test_ctpop_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: 'test_ctpop_v4i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %ctpop
+;
   %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %a)
   ret <4 x i16> %ctpop
 }
@@ -88,6 +97,7 @@ define <8 x i16> @test_ctpop_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'test_ctpop_v8i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %ctpop
+;
   %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
   ret <8 x i16> %ctpop
 }
@@ -96,6 +106,7 @@ define <2 x i8> @test_ctpop_v2i8(<2 x i8> %a) {
 ; CHECK-LABEL: 'test_ctpop_v2i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i8> %ctpop
+;
   %ctpop = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %a)
   ret <2 x i8> %ctpop
 }
@@ -104,6 +115,7 @@ define <4 x i8> @test_ctpop_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: 'test_ctpop_v4i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %ctpop = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %ctpop
+;
   %ctpop = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %a)
   ret <4 x i8> %ctpop
 }
@@ -112,6 +124,7 @@ define <8 x i8> @test_ctpop_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: 'test_ctpop_v8i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ctpop = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %ctpop
+;
   %ctpop = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
   ret <8 x i8> %ctpop
 }
@@ -120,6 +133,7 @@ define <16 x i8> @test_ctpop_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: 'test_ctpop_v16i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %ctpop
+;
   %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
   ret <16 x i8> %ctpop
 }
@@ -128,6 +142,7 @@ define <4 x i64> @test_ctpop_v4i64(<4 x i64> %a) {
 ; CHECK-LABEL: 'test_ctpop_v4i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %ctpop
+;
   %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a)
   ret <4 x i64> %ctpop
 }
@@ -136,6 +151,7 @@ define <8 x i32> @test_ctpop_v8i32(<8 x i32> %a) {
 ; CHECK-LABEL: 'test_ctpop_v8i32'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %ctpop
+;
   %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a)
   ret <8 x i32> %ctpop
 }
@@ -144,6 +160,7 @@ define <16 x i16> @test_ctpop_v16i16(<16 x i16> %a) {
 ; CHECK-LABEL: 'test_ctpop_v16i16'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %ctpop
+;
   %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a)
   ret <16 x i16> %ctpop
 }
@@ -152,6 +169,7 @@ define <32 x i8> @test_ctpop_v32i8(<32 x i8> %a) {
 ; CHECK-LABEL: 'test_ctpop_v32i8'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %ctpop
+;
   %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a)
   ret <32 x i8> %ctpop
 }
@@ -160,6 +178,7 @@ define i64 @test_ctpop_noneon_i64(i64 %a) "target-features"="-fp-armv8,-neon" {
 ; CHECK-LABEL: 'test_ctpop_noneon_i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %ctpop
+;
   %ctpop = call i64 @llvm.ctpop.i64(i64 %a)
   ret i64 %ctpop
 }
@@ -168,6 +187,7 @@ define <2 x i64> @test_ctpop_noneon_v2i64(<2 x i64> %a) "target-features"="-fp-a
 ; CHECK-LABEL: 'test_ctpop_noneon_v2i64'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %ctpop
+;
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   ret <2 x i64> %ctpop
 }

diff  --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
index cf56e4d92b31e3..dfed023f0119d4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s
 
 ; Verify the cost of integer division by constant.
@@ -5,43 +6,55 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
-; CHECK-LABEL: function 'sdiv8xi16'
-; CHECK: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+; CHECK-LABEL: 'sdiv8xi16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
+;
   %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
   ret <16 x i8> %div
 }
 
 define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
-; CHECK-LABEL: function 'sdiv16xi8'
-; CHECK: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+; CHECK-LABEL: 'sdiv16xi8'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
+;
   %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
   ret <8 x i16> %div
 }
 
 define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
-; CHECK-LABEL: function 'sdiv32xi4'
-; CHECK: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+; CHECK-LABEL: 'sdiv32xi4'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
+;
   %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
   ret <4 x i32> %div
 }
 
 define <16 x i8> @udiv8xi16(<16 x i8> %x) {
-; CHECK-LABEL: function 'udiv8xi16'
-; CHECK: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+; CHECK-LABEL: 'udiv8xi16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
+;
   %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
   ret <16 x i8> %div
 }
 
 define <8 x i16> @udiv16xi8(<8 x i16> %x) {
-; CHECK-LABEL: function 'udiv16xi8'
-; CHECK: Found an estimated cost of 7 for instruction:   %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+; CHECK-LABEL: 'udiv16xi8'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
+;
   %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
   ret <8 x i16> %div
 }
 
 define <4 x i32> @udiv32xi4(<4 x i32> %x) {
-; CHECK-LABEL: function 'udiv32xi4'
-; CHECK: Found an estimated cost of 7 for instruction:   %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+; CHECK-LABEL: 'udiv32xi4'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
+;
   %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
   ret <4 x i32> %div
 }

diff  --git a/llvm/test/Analysis/CostModel/AArch64/gep.ll b/llvm/test/Analysis/CostModel/AArch64/gep.ll
index 662a0b216a857e..34c53cba173b87 100644
--- a/llvm/test/Analysis/CostModel/AArch64/gep.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/gep.ll
@@ -1,291 +1,400 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64--linux-gnu < %s | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
 
 define i8 @test1(ptr %p) {
-; CHECK-LABEL: test1
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test1'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 1
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test2(ptr %p) {
-; CHECK-LABEL: test2
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test2'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 1
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test3(ptr %p) {
-; CHECK-LABEL: test3
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test3'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 1
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test4(ptr %p) {
-; CHECK-LABEL: test4
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test4'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 1
   %v = load i64, ptr %a
   ret i64 %v
 }
 
 define i8 @test5(ptr %p) {
-; CHECK-LABEL: test5
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test5'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 1024
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 1024
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test6(ptr %p) {
-; CHECK-LABEL: test6
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test6'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 1024
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 1024
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test7(ptr %p) {
-; CHECK-LABEL: test7
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test7'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 1024
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 1024
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test8(ptr %p) {
-; CHECK-LABEL: test8
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test8'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 1024
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 1024
   %v = load i64, ptr %a
   ret i64 %v
 }
 
 define i8 @test9(ptr %p) {
-; CHECK-LABEL: test9
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test9'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 4096
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 4096
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test10(ptr %p) {
-; CHECK-LABEL: test10
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test10'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 4096
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 4096
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test11(ptr %p) {
-; CHECK-LABEL: test11
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test11'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 4096
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 4096
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test12(ptr %p) {
-; CHECK-LABEL: test12
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test12'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 4096
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 4096
   %v = load i64, ptr %a
   ret i64 %v
 }
 
 define i8 @test13(ptr %p) {
-; CHECK-LABEL: test13
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test13'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 -64
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test14(ptr %p) {
-; CHECK-LABEL: test14
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test14'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 -64
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test15(ptr %p) {
-; CHECK-LABEL: test15
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test15'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 -64
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test16(ptr %p) {
-; CHECK-LABEL: test16
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test16'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 -64
   %v = load i64, ptr %a
   ret i64 %v
 }
 
 define i8 @test17(ptr %p) {
-; CHECK-LABEL: test17
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test17'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -1024
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 -1024
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test18(ptr %p) {
-; CHECK-LABEL: test18
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test18'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -1024
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 -1024
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test19(ptr %p) {
-; CHECK-LABEL: test19
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test19'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -1024
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 -1024
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test20(ptr %p) {
-; CHECK-LABEL: test20
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test20'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -1024
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 -1024
   %v = load i64, ptr %a
   ret i64 %v
 }
 
 define i8 @test21(ptr %p, i32 %i) {
-; CHECK-LABEL: test21
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test21'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 %i
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 %i
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test22(ptr %p, i32 %i) {
-; CHECK-LABEL: test22
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test22'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 %i
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 %i
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test23(ptr %p, i32 %i) {
-; CHECK-LABEL: test23
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test23'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 %i
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 %i
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test24(ptr %p, i32 %i) {
-; CHECK-LABEL: test24
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test24'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 %i
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 %i
   %v = load i64, ptr %a
   ret i64 %v
 }
 
 define i8 @test25(ptr %p) {
-; CHECK-LABEL: test25
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test25'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -128
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 -128
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test26(ptr %p) {
-; CHECK-LABEL: test26
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test26'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -128
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 -128
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test27(ptr %p) {
-; CHECK-LABEL: test27
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test27'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -128
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 -128
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test28(ptr %p) {
-; CHECK-LABEL: test28
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test28'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -128
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 -128
   %v = load i64, ptr %a
   ret i64 %v
 }
 
 define i8 @test29(ptr %p) {
-; CHECK-LABEL: test29
-; CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test29'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -256
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 -256
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test30(ptr %p) {
-; CHECK-LABEL: test30
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test30'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -256
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 -256
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test31(ptr %p) {
-; CHECK-LABEL: test31
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test31'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -256
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 -256
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test32(ptr %p) {
-; CHECK-LABEL: test32
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test32'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -256
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 -256
   %v = load i64, ptr %a
   ret i64 %v
 }
 
 define i8 @test33(ptr %p) {
-; CHECK-LABEL: test33
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, ptr
+; CHECK-LABEL: 'test33'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i8, ptr %p, i32 -512
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i8, ptr %a, align 1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %v
+;
   %a = getelementptr inbounds i8, ptr %p, i32 -512
   %v = load i8, ptr %a
   ret i8 %v
 }
 
 define i16 @test34(ptr %p) {
-; CHECK-LABEL: test34
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, ptr
+; CHECK-LABEL: 'test34'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i16, ptr %p, i32 -512
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i16, ptr %a, align 2
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %v
+;
   %a = getelementptr inbounds i16, ptr %p, i32 -512
   %v = load i16, ptr %a
   ret i16 %v
 }
 
 define i32 @test35(ptr %p) {
-; CHECK-LABEL: test35
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, ptr
+; CHECK-LABEL: 'test35'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i32, ptr %p, i32 -512
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i32, ptr %a, align 4
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %v
+;
   %a = getelementptr inbounds i32, ptr %p, i32 -512
   %v = load i32, ptr %a
   ret i32 %v
 }
 
 define i64 @test36(ptr %p) {
-; CHECK-LABEL: test36
-; CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, ptr
+; CHECK-LABEL: 'test36'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a = getelementptr inbounds i64, ptr %p, i32 -512
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = load i64, ptr %a, align 8
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %v
+;
   %a = getelementptr inbounds i64, ptr %p, i32 -512
   %v = load i64, ptr %a
   ret i64 %v

diff  --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
index 35cd95d1debe4e..92ecfe8c436473 100644
--- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll
@@ -109,55 +109,57 @@ entry:
 
 define void @scalable_ext_loads() {
 ; CHECK-LABEL: 'scalable_ext_loads'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %zext.nxv16i8to32 = zext <vscale x 16 x i8> %load.nxv16i8.2 to <vscale x 16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction:   %zext.nxv16i8to64 = zext <vscale x 16 x i8> %load.nxv16i8.3 to <vscale x 16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %zext.nxv8i8to16 = zext <vscale x 8 x i8> %load.nxv8i8 to <vscale x 8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %zext.nxv4i8to32 = zext <vscale x 4 x i8> %load.nxv4i8 to <vscale x 4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %zext.nxv2i8to64 = zext <vscale x 2 x i8> %load.nxv2i8 to <vscale x 2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %zext.nxv8i16to32 = zext <vscale x 8 x i16> %load.nxv8i16 to <vscale x 8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %zext.nxv8i16to64 = zext <vscale x 8 x i16> %load.nxv8i16.2 to <vscale x 8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %zext.nxv4i16to32 = zext <vscale x 4 x i16> %load.nxv4i16 to <vscale x 4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %zext.nxv2i16to64 = zext <vscale x 2 x i16> %load.nxv2i16 to <vscale x 2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %zext.nxv4i32to64 = zext <vscale x 4 x i32> %load.nxv4i32 to <vscale x 4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %zext.nxv2i32to64 = zext <vscale x 2 x i32> %load.nxv2i32 to <vscale x 2 x i64>
-
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %sext.nxv16i8to16 = sext <vscale x 16 x i8> %load2.nxv16i8 to <vscale x 16 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %sext.nxv16i8to32 = sext <vscale x 16 x i8> %load2.nxv16i8.2 to <vscale x 16 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction:   %sext.nxv16i8to64 = sext <vscale x 16 x i8> %load2.nxv16i8.3 to <vscale x 16 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %sext.nxv8i8to16 = sext <vscale x 8 x i8> %load2.nxv8i8 to <vscale x 8 x i16>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %sext.nxv4i8to32 = sext <vscale x 4 x i8> %load2.nxv4i8 to <vscale x 4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %sext.nxv2i8to64 = sext <vscale x 2 x i8> %load2.nxv2i8 to <vscale x 2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %sext.nxv8i16to32 = sext <vscale x 8 x i16> %load2.nxv8i16 to <vscale x 8 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %sext.nxv8i16to64 = sext <vscale x 8 x i16> %load2.nxv8i16.2 to <vscale x 8 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %sext.nxv4i16to32 = sext <vscale x 4 x i16> %load2.nxv4i16 to <vscale x 4 x i32>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %sext.nxv2i16to64 = sext <vscale x 2 x i16> %load2.nxv2i16 to <vscale x 2 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %sext.nxv4i32to64 = sext <vscale x 4 x i32> %load2.nxv4i32 to <vscale x 4 x i64>
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %load2.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   %sext.nxv2i32to64 = sext <vscale x 2 x i32> %load2.nxv2i32 to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zext.nxv16i8to32 = zext <vscale x 16 x i8> %load.nxv16i8.2 to <vscale x 16 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %zext.nxv16i8to64 = zext <vscale x 16 x i8> %load.nxv16i8.3 to <vscale x 16 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv8i8to16 = zext <vscale x 8 x i8> %load.nxv8i8 to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv4i8to32 = zext <vscale x 4 x i8> %load.nxv4i8 to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i8to64 = zext <vscale x 2 x i8> %load.nxv2i8 to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv8i16to32 = zext <vscale x 8 x i16> %load.nxv8i16 to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %zext.nxv8i16to64 = zext <vscale x 8 x i16> %load.nxv8i16.2 to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv4i16to32 = zext <vscale x 4 x i16> %load.nxv4i16 to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i16to64 = zext <vscale x 2 x i16> %load.nxv2i16 to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %zext.nxv4i32to64 = zext <vscale x 4 x i32> %load.nxv4i32 to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zext.nxv2i32to64 = zext <vscale x 2 x i32> %load.nxv2i32 to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv16i8to16 = sext <vscale x 16 x i8> %load2.nxv16i8 to <vscale x 16 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv16i8.2 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sext.nxv16i8to32 = sext <vscale x 16 x i8> %load2.nxv16i8.2 to <vscale x 16 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv16i8.3 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %sext.nxv16i8to64 = sext <vscale x 16 x i8> %load2.nxv16i8.3 to <vscale x 16 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i8 = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv8i8to16 = sext <vscale x 8 x i8> %load2.nxv8i8 to <vscale x 8 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i8 = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv4i8to32 = sext <vscale x 4 x i8> %load2.nxv4i8 to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i8 = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i8to64 = sext <vscale x 2 x i8> %load2.nxv2i8 to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i16 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv8i16to32 = sext <vscale x 8 x i16> %load2.nxv8i16 to <vscale x 8 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv8i16.2 = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %sext.nxv8i16to64 = sext <vscale x 8 x i16> %load2.nxv8i16.2 to <vscale x 8 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i16 = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv4i16to32 = sext <vscale x 4 x i16> %load2.nxv4i16 to <vscale x 4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i16 = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i16to64 = sext <vscale x 2 x i16> %load2.nxv2i16 to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sext.nxv4i32to64 = sext <vscale x 4 x i32> %load2.nxv4i32 to <vscale x 4 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %load2.nxv2i32 = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %sext.nxv2i32to64 = sext <vscale x 2 x i32> %load2.nxv2i32 to <vscale x 2 x i64>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+
 
   %load.nxv16i8 = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 8, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
   %zext.nxv16i8to16 = zext <vscale x 16 x i8> %load.nxv16i8 to <vscale x 16 x i16>

diff  --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
index e75c527f857ecf..ab66d93bb8995a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
 ; Check memory cost model action for fixed vector SVE and Neon
 ; Vector bits size lower than 256 bits end up assuming Neon cost model
 ; CHECK-NEON has same performance as CHECK-SVE-128
@@ -11,80 +12,176 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define <16 x i8> @load16(ptr %ptr) {
 ; CHECK: function 'load16'
-; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEON-LABEL: 'load16'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <16 x i8>, ptr %ptr, align 16
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %out
+;
+; CHECK-SVE-128-LABEL: 'load16'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <16 x i8>, ptr %ptr, align 16
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %out
+;
+; CHECK-SVE-256-LABEL: 'load16'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <16 x i8>, ptr %ptr, align 16
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %out
+;
+; CHECK-SVE-512-LABEL: 'load16'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <16 x i8>, ptr %ptr, align 16
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %out
+;
   %out = load <16 x i8>, ptr %ptr
   ret <16 x i8> %out
 }
 
 define void @store16(ptr %ptr, <16 x i8> %val) {
 ; CHECK: function 'store16'
-; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEON-LABEL: 'store16'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %val, ptr %ptr, align 16
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'store16'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %val, ptr %ptr, align 16
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'store16'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %val, ptr %ptr, align 16
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'store16'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %val, ptr %ptr, align 16
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   store <16 x i8> %val, ptr %ptr
   ret void
 }
 
 define <8 x i8> @load8(ptr %ptr) {
 ; CHECK: function 'load8'
-; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEON-LABEL: 'load8'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <8 x i8>, ptr %ptr, align 8
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %out
+;
+; CHECK-SVE-128-LABEL: 'load8'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <8 x i8>, ptr %ptr, align 8
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %out
+;
+; CHECK-SVE-256-LABEL: 'load8'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <8 x i8>, ptr %ptr, align 8
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %out
+;
+; CHECK-SVE-512-LABEL: 'load8'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <8 x i8>, ptr %ptr, align 8
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %out
+;
   %out = load <8 x i8>, ptr %ptr
   ret <8 x i8> %out
 }
 
 define void @store8(ptr %ptr, <8 x i8> %val) {
 ; CHECK: function 'store8'
-; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEON-LABEL: 'store8'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %val, ptr %ptr, align 8
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'store8'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %val, ptr %ptr, align 8
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'store8'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %val, ptr %ptr, align 8
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'store8'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %val, ptr %ptr, align 8
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   store <8 x i8> %val, ptr %ptr
   ret void
 }
 
 define <4 x i8> @load4(ptr %ptr) {
 ; CHECK: function 'load4'
-; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEON-LABEL: 'load4'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %out = load <4 x i8>, ptr %ptr, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %out
+;
+; CHECK-SVE-128-LABEL: 'load4'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %out = load <4 x i8>, ptr %ptr, align 4
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %out
+;
+; CHECK-SVE-256-LABEL: 'load4'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <4 x i8>, ptr %ptr, align 4
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %out
+;
+; CHECK-SVE-512-LABEL: 'load4'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <4 x i8>, ptr %ptr, align 4
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %out
+;
   %out = load <4 x i8>, ptr %ptr
   ret <4 x i8> %out
 }
 
 define void @store4(ptr %ptr, <4 x i8> %val) {
 ; CHECK: function 'store4'
-; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEON-LABEL: 'store4'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %val, ptr %ptr, align 4
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'store4'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %val, ptr %ptr, align 4
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'store4'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %val, ptr %ptr, align 4
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'store4'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %val, ptr %ptr, align 4
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   store <4 x i8> %val, ptr %ptr
   ret void
 }
 
 define <16 x i16> @load_256(ptr %ptr) {
 ; CHECK: function 'load_256'
-; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEON-LABEL: 'load_256'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %out = load <16 x i16>, ptr %ptr, align 32
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %out
+;
+; CHECK-SVE-128-LABEL: 'load_256'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %out = load <16 x i16>, ptr %ptr, align 32
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %out
+;
+; CHECK-SVE-256-LABEL: 'load_256'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <16 x i16>, ptr %ptr, align 32
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %out
+;
+; CHECK-SVE-512-LABEL: 'load_256'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <16 x i16>, ptr %ptr, align 32
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %out
+;
   %out = load <16 x i16>, ptr %ptr
   ret <16 x i16> %out
 }
 
 define <8 x i64> @load_512(ptr %ptr) {
 ; CHECK: function 'load_512'
-; CHECK-NEON: Cost Model: Found an estimated cost of 4 for instruction:
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 4 for instruction:
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 2 for instruction:
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEON-LABEL: 'load_512'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %out = load <8 x i64>, ptr %ptr, align 64
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %out
+;
+; CHECK-SVE-128-LABEL: 'load_512'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %out = load <8 x i64>, ptr %ptr, align 64
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %out
+;
+; CHECK-SVE-256-LABEL: 'load_512'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %out = load <8 x i64>, ptr %ptr, align 64
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %out
+;
+; CHECK-SVE-512-LABEL: 'load_512'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %out = load <8 x i64>, ptr %ptr, align 64
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %out
+;
   %out = load <8 x i64>, ptr %ptr
   ret <8 x i64> %out
 }
@@ -92,10 +189,21 @@ define <8 x i64> @load_512(ptr %ptr) {
 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x i8>)
 define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) {
 ; CHECK:         gather_load_4xi8_constant_mask
-; CHECK-NEON:    Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
+; CHECK-NEON-LABEL: 'gather_load_4xi8_constant_mask'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
+;
+; CHECK-SVE-128-LABEL: 'gather_load_4xi8_constant_mask'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
+;
+; CHECK-SVE-256-LABEL: 'gather_load_4xi8_constant_mask'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
+;
+; CHECK-SVE-512-LABEL: 'gather_load_4xi8_constant_mask'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
 ;
   %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
   ret <4 x i8> %lv
@@ -103,10 +211,21 @@ define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) {
 
 define <4 x i8> @gather_load_4xi8_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) {
 ; CHECK:         gather_load_4xi8_variable_mask
-; CHECK-NEON:    Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction:  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0
+; CHECK-NEON-LABEL: 'gather_load_4xi8_variable_mask'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
+;
+; CHECK-SVE-128-LABEL: 'gather_load_4xi8_variable_mask'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
+;
+; CHECK-SVE-256-LABEL: 'gather_load_4xi8_variable_mask'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
+;
+; CHECK-SVE-512-LABEL: 'gather_load_4xi8_variable_mask'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
 ;
   %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
   ret <4 x i8> %lv
@@ -115,10 +234,21 @@ define <4 x i8> @gather_load_4xi8_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond)
 declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32 immarg, <4 x i1>)
 define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) {
 ; CHECK:         scatter_store_4xi8_constant_mask
-; CHECK-NEON:    Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0(
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0(
+; CHECK-NEON-LABEL: 'scatter_store_4xi8_constant_mask'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_constant_mask'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_constant_mask'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'scatter_store_4xi8_constant_mask'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
   ret void
@@ -126,10 +256,21 @@ define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) {
 
 define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %cond) {
 ; CHECK:         scatter_store_4xi8_variable_mask
-; CHECK-NEON:    Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0(
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction:  call void @llvm.masked.scatter.v4i8.v4p0(
+; CHECK-NEON-LABEL: 'scatter_store_4xi8_variable_mask'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_variable_mask'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_variable_mask'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'scatter_store_4xi8_variable_mask'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
   ret void
@@ -138,10 +279,21 @@ define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x ptr> %ptrs, <4
 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x i32>)
 define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) {
 ; CHECK:         gather_load_4xi32_constant_mask
-; CHECK-NEON:    Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
+; CHECK-NEON-LABEL: 'gather_load_4xi32_constant_mask'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
+;
+; CHECK-SVE-128-LABEL: 'gather_load_4xi32_constant_mask'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
+;
+; CHECK-SVE-256-LABEL: 'gather_load_4xi32_constant_mask'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
+;
+; CHECK-SVE-512-LABEL: 'gather_load_4xi32_constant_mask'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
 ;
   %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
   ret <4 x i32> %lv
@@ -149,10 +301,21 @@ define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) {
 
 define <4 x i32> @gather_load_4xi32_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) {
 ; CHECK:         gather_load_4xi32_variable_mask
-; CHECK-NEON:    Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction:  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0
+; CHECK-NEON-LABEL: 'gather_load_4xi32_variable_mask'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
+;
+; CHECK-SVE-128-LABEL: 'gather_load_4xi32_variable_mask'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
+;
+; CHECK-SVE-256-LABEL: 'gather_load_4xi32_variable_mask'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
+;
+; CHECK-SVE-512-LABEL: 'gather_load_4xi32_variable_mask'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
 ;
   %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
   ret <4 x i32> %lv
@@ -161,10 +324,21 @@ define <4 x i32> @gather_load_4xi32_variable_mask(<4 x ptr> %ptrs, <4 x i1> %con
 declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, <4 x i1>)
 define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) {
 ; CHECK:         scatter_store_4xi32_constant_mask
-; CHECK-NEON:    Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0(
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0(
+; CHECK-NEON-LABEL: 'scatter_store_4xi32_constant_mask'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_constant_mask'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_constant_mask'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'scatter_store_4xi32_constant_mask'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
   ret void
@@ -172,10 +346,21 @@ define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs)
 
 define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %cond) {
 ; CHECK:         scatter_store_4xi32_variable_mask
-; CHECK-NEON:    Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0(
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0(
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 40 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0(
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 40 for instruction:  call void @llvm.masked.scatter.v4i32.v4p0(
+; CHECK-NEON-LABEL: 'scatter_store_4xi32_variable_mask'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_variable_mask'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_variable_mask'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'scatter_store_4xi32_variable_mask'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond)
   ret void
@@ -184,10 +369,22 @@ define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x ptr> %ptrs,
 declare <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr>, i32, <256 x i1>, <256 x i16>)
 define void @sve_gather_vls(<256 x i1> %v256i1mask) {
 ; CHECK-LABEL: 'sve_scatter_vls'
-; CHECK-NEON: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-NEON-LABEL: 'sve_gather_vls'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'sve_gather_vls'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 1952 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'sve_gather_vls'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'sve_gather_vls'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
 entry:
   %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer)
   ret void
@@ -196,10 +393,22 @@ entry:
 declare <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr>, i32, <256 x i1>, <256 x float>)
 define void @sve_gather_vls_float(<256 x i1> %v256i1mask) {
 ; CHECK-LABEL: 'sve_gather_vls_float'
-; CHECK-NEON: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-NEON-LABEL: 'sve_gather_vls_float'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'sve_gather_vls_float'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 1856 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'sve_gather_vls_float'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'sve_gather_vls_float'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
 entry:
   %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer)
   ret void
@@ -208,10 +417,22 @@ entry:
 declare void @llvm.masked.scatter.v256i8.v256p0(<256 x i8>, <256 x ptr>, i32, <256 x i1>)
 define void @sve_scatter_vls(<256 x i1> %v256i1mask){
 ; CHECK-LABEL: 'sve_scatter_vls'
-; CHECK-NEON: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-NEON-LABEL: 'sve_scatter_vls'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'sve_scatter_vls'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 2000 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'sve_scatter_vls'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'sve_scatter_vls'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 2560 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
 entry:
   call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask)
   ret void
@@ -220,10 +441,22 @@ entry:
 declare void @llvm.masked.scatter.v512f16.v512p0(<512 x half>, <512 x ptr>, i32, <512 x i1>)
 define void @sve_scatter_vls_float(<512 x i1> %v512i1mask){
 ; CHECK-LABEL: 'sve_scatter_vls_float'
-; CHECK-NEON: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
-; CHECK-SVE-128: Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
-; CHECK-SVE-256: Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
-; CHECK-SVE-512: Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-NEON-LABEL: 'sve_scatter_vls_float'
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-128-LABEL: 'sve_scatter_vls_float'
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 3904 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-256-LABEL: 'sve_scatter_vls_float'
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; CHECK-SVE-512-LABEL: 'sve_scatter_vls_float'
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 5120 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask)
   ret void
 }

diff  --git a/llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll b/llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll
index 8c139577500ec0..cf208608c32009 100644
--- a/llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64--linux-gnu -mattr=+neon  < %s | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -5,10 +6,12 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; Check icmp for legal integer vectors.
 define void @stepvector_legal_int() {
 ; CHECK-LABEL: 'stepvector_legal_int'
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
-; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
   %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
   %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
@@ -19,8 +22,10 @@ define void @stepvector_legal_int() {
 ; Check icmp for an illegal integer vector.
 define void @stepvector_illegal_int() {
 ; CHECK-LABEL: 'stepvector_illegal_int'
-; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
   %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
   %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
   ret void


        


More information about the llvm-commits mailing list