[llvm] f870c55 - [AArch64] NFC: Cleanup some SVE cost-model tests.

Mon Mar 1 05:29:54 PST 2021

Author: Sander de Smalen
Date: 2021-03-01T13:26:31Z
New Revision: f870c551f090b6edc83892efd68e9e96ed5c19a8

URL: https://github.com/llvm/llvm-project/commit/f870c551f090b6edc83892efd68e9e96ed5c19a8
DIFF: https://github.com/llvm/llvm-project/commit/f870c551f090b6edc83892efd68e9e96ed5c19a8.diff

LOG: [AArch64] NFC: Cleanup some SVE cost-model tests.

Moved some of the `sve-getIntrinsicCost-<..>` into a single sve-intrinsics.ll
file, and simplified the tests a bit by bundling all the intrinsics in one
function (instead of testing one intrinsic per function). That makes it easier
to see the cost of the intrinsics.

Added: 
    llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
    llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
    llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll

Modified: 
    

Removed: 
    llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll
    llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll
    llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll
    llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll
    llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reduce.ll
    llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
similarity index 100%
rename from llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll
rename to llvm/test/Analysis/CostModel/AArch64/sve-gather.ll

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll
deleted file mode 100644
index 484aa2a01130..000000000000

--- a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; Checks getIntrinsicInstrCost in BasicTTIImpl.h with SVE for CTLZ and CCTZ
-
-; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
-
-; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
-
-; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
-; WARN-NOT: warning
-
-; Check for CTLZ
-
-define void  @ctlz_nxv4i32(<vscale x 4 x i32> %A) {
-; CHECK-LABEL: 'ctlz_nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %1 = tail call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret void
-
-  %1 = tail call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
-  ret void
-}
-
-; Check for CCTZ
-
-define void  @cttz_nxv4i32(<vscale x 4 x i32> %A) {
-; CHECK-LABEL: 'cttz_nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %1 = tail call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret void
-
-  %1 = tail call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
-  ret void
-}
-
-declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
-declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll
deleted file mode 100644
index 9523e17cb5de..000000000000
--- a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s
-
-define <16 x i32> @extract_cost(<vscale x 4 x i32> %vec) {
-; CHECK-LABEL: 'extract_cost'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction:   %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret <16 x i32> %ret
-
-  %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
-  ret <16 x i32> %ret
-}
-
-define <vscale x 4 x i32> @insert_cost(<vscale x 4 x i32> %vec, <16 x i32> %subVec) {
-; CHECK-LABEL: 'insert_cost'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction:   %ret = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %vec, <16 x i32> %subVec, i64 0)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret <vscale x 4 x i32> %ret
-
-  %ret = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %vec, <16 x i32> %subVec, i64 0)
-  ret <vscale x 4 x i32> %ret
-}
-
-define <vscale x 4 x i32> @extract_cost_scalable(<vscale x 16 x i32> %vec) {
-; CHECK-LABEL: 'extract_cost_scalable'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %ret = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret <vscale x 4 x i32> %ret
-
-  %ret = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0)
-  ret <vscale x 4 x i32> %ret
-}
-
-define <vscale x 16 x i32> @insert_cost_scalable(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec) {
-; CHECK-LABEL: 'insert_cost_scalable'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %ret = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec, i64 0)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret <vscale x 16 x i32> %ret
-
-  %ret = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec, i64 0)
-  ret <vscale x 16 x i32> %ret
-}
-
-declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32>, i64)
-declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32>, <16 x i32>, i64)
-declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32>, i64)
-declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reduce.ll
deleted file mode 100644
index 486e7aaac68a..000000000000
--- a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reduce.ll
+++ /dev/null
@@ -1,251 +0,0 @@
-; Check getIntrinsicInstrCost in BasicTTIImpl.h with SVE for vector.reduce.<operand>
-; Checks legal and not legal vector size
-
-; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
-
-
-; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
-; WARN-NOT: warning
-
-define i32 @add.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'add.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT:Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @add.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'add.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %r = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT:Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define i32 @mul.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'mul.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction:   %r = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @mul.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'mul.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction:   %r = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define i32 @and.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'and.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @and.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'and.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %r = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define i32 @or.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'or.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @or.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'or.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %r = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define i32 @xor.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'xor.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @xor.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'xor.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction:   %r = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define i32 @umin.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'umin.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @umin.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'umin.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %r = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define float @fmax.f32.nxv4f32(<vscale x 4 x float> %v) {
-; CHECK-LABEL: 'fmax.f32.nxv4f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret float %r
-
-  %r = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v)
-  ret float %r
-}
-
-define double @fmax.f64.nxv4f64(<vscale x 4 x double> %v) {
-; CHECK-LABEL: 'fmax.f64.nxv4f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %r = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret double %r
-
-  %r = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v)
-  ret double %r
-}
-
-define float @fmin.f32.nxv4f32(<vscale x 4 x float> %v) {
-; CHECK-LABEL: 'fmin.f32.nxv4f32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret float %r
-
-  %r = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v)
-  ret float %r
-}
-
-define double @fmin.f64.nxv4f64(<vscale x 4 x double> %v) {
-; CHECK-LABEL: 'fmin.f64.nxv4f64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %r = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret double %r
-
-  %r = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v)
-  ret double %r
-}
-
-define i32 @umax.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'umax.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @umax.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'umax.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %r = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define i32 @smin.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'smin.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @smin.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'smin.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %r = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define i32 @smax.i32.nxv4i32(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: 'smax.i32.nxv4i32'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %r = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i32 %r
-
-  %r = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
-  ret i32 %r
-}
-
-define i64 @smax.i64.nxv4i64(<vscale x 4 x i64> %v) {
-; CHECK-LABEL: 'smax.i64.nxv4i64'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction:   %r = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret i64 %r
-
-  %r = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
-  ret i64 %r
-}
-
-define float @fadda_nxv4f32(float %start, <vscale x 4 x float> %a) #0 {
-; CHECK-LABEL: 'fadda_nxv4f32
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %start, <vscale x 4 x float> %a)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret float %res
-
-  %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %start, <vscale x 4 x float> %a)
-  ret float %res
-}
-
-define double @fadda_nxv4f64(double %start, <vscale x 4 x double> %a) #0 {
-; CHECK-LABEL: 'fadda_nxv4f64
-; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %res = call double @llvm.vector.reduce.fadd.nxv4f64(double %start, <vscale x 4 x double> %a)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret double %res
-
-  %res = call double @llvm.vector.reduce.fadd.nxv4f64(double %start, <vscale x 4 x double> %a)
-  ret double %res
-}
-
-
-declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
-declare i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32>)
-declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
-declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
-declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
-declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
-declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
-declare i32 @llvm.vector.reduce.fmin.nxv4i32(<vscale x 4 x i32>)
-declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
-declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
-declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
-declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
-declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
-declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
-declare i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64>)
-declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
-declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
-declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
-declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
-declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
-declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
-declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
-declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
-declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
-declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll
deleted file mode 100644
index 6de1a54a3fe2..000000000000
--- a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reverse.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; Check getIntrinsicInstrCost in BasicTTIImpl.h for vector.reverse
-
-; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
-
-; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
-
-; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
-; WARN-NOT: warning
-
-define void @vector_reverse() #0 {
-; CHECK-LABEL: 'vector_reverse':
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %2 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %4 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %5 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %6 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %7 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %8 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %9 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %10 = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %11 = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %12 = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %13 = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %14 = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %15 = call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:   %16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret void
-
-  call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
-  call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
-  call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
-  call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
-  call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
-  call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
-  call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
-  call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
-  call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
-  call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
-  call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
-  call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
-  call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
-  call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
-  call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
-  call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
-  ret void
-}
-
-attributes #0 = { "target-features"="+sve,+bf16" }
-
-declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
-declare <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8>)
-declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
-declare <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16>)
-declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
-declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
-declare <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64>)
-declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
-declare <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half>)
-declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
-declare <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float>)
-declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
-declare <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double>)
-declare <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat>)
-declare <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat>)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
new file mode 100644
index 000000000000..6653c394d7fd
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -0,0 +1,171 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+
+define void @vector_insert_extract(<vscale x 4 x i32> %v0, <vscale x 16 x i32> %v1, <16 x i32> %v2) {
+; CHECK-LABEL: 'vector_insert_extract'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0)
+  %extract_fixed_from_scalable = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %v0, i64 0)
+  %insert_fixed_into_scalable = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %v0, <16 x i32> %v2, i64 0)
+  %extract_scalable_from_scalable = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %v1, i64 0)
+  %insert_scalable_into_scalable = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %v1, <vscale x 4 x i32> %v0, i64 0)
+  ret void
+}
+declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32>, i64)
+declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32>, <16 x i32>, i64)
+declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32>, i64)
+declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64)
+
+
+define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale x 4 x float> %v2, <vscale x 4 x double> %v3) {
+; CHECK-LABEL: 'reductions'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
+  %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0)
+  %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1)
+  %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0)
+  %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1)
+  %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0)
+  %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1)
+  %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0)
+  %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1)
+  %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0)
+  %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1)
+  %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0)
+  %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1)
+  %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0)
+  %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1)
+  %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0)
+  %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1)
+  %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0)
+  %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1)
+
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
+  %fadd_nxv4f32 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> %v2)
+  %fadd_nxv4f64 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> %v3)
+  %fmin_nxv4f32 = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2)
+  %fmin_nxv4f64 = call double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3)
+  %fmax_nxv4f32 = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2)
+  %fmax_nxv4f64 = call double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3)
+
+  ret void
+}
+declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
+declare i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64>)
+declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
+declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
+declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
+declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
+declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
+declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
+declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
+declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
+declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
+declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
+declare float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float>)
+declare double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double>)
+declare float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float>)
+declare double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double>)
+
+
+define void  @count_zeroes(<vscale x 4 x i32> %A) {
+; CHECK-LABEL: 'count_zeroes'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ctlz = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cttz = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
+  %ctlz = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
+  %cttz = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %A, i1 true)
+  ret void
+}
+declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
+declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1)
+
+
+define void @vector_reverse() #0 {
+; CHECK-LABEL: 'vector_reverse':
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8if16 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
+  %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> undef)
+  %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8> undef)
+  %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> undef)
+  %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> undef)
+  %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> undef)
+  %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> undef)
+  %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> undef)
+  %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64> undef)
+  %reverse_nxv8if16 = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> undef)
+  %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> undef)
+  %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> undef)
+  %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> undef)
+  %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> undef)
+  %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double> undef)
+  %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef)
+  %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef)
+  ret void
+}
+declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
+declare <vscale x 32 x i8> @llvm.experimental.vector.reverse.nxv32i8(<vscale x 32 x i8>)
+declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
+declare <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16>)
+declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
+declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
+declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
+declare <vscale x 4 x i64> @llvm.experimental.vector.reverse.nxv4i64(<vscale x 4 x i64>)
+declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
+declare <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half>)
+declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float>)
+declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x double> @llvm.experimental.vector.reverse.nxv4f64(<vscale x 4 x double>)
+declare <vscale x 8 x bfloat> @llvm.experimental.vector.reverse.nxv8bf16(<vscale x 8 x bfloat>)
+declare <vscale x 16 x bfloat> @llvm.experimental.vector.reverse.nxv16bf16(<vscale x 16 x bfloat>)
+
+attributes #0 = { "target-features"="+sve,+bf16" }

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
similarity index 100%
rename from llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll
rename to llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll