[llvm-branch-commits] [llvm] 06b159a - [RISCV] Add tests for rv32 gather/scatter costs. NFC

Wed Jan 28 01:23:23 PST 2026

Author: Luke Lau
Date: 2026-01-28T09:23:14Z
New Revision: 06b159af459b76dfd5f50c97fb067b5ea67f6f70

URL: https://github.com/llvm/llvm-project/commit/06b159af459b76dfd5f50c97fb067b5ea67f6f70
DIFF: https://github.com/llvm/llvm-project/commit/06b159af459b76dfd5f50c97fb067b5ea67f6f70.diff

LOG: [RISCV] Add tests for rv32 gather/scatter costs. NFC

There's a divergence with the rv32 costs that I plan on fixing in
another patch, so this precommits the tests for them.

The zve32f RUN lines were split off into another file so the check prefixes
are easier to reason about.

The -riscv-v-vector-bits-max RUN lines were also removed to simplify the
check prefixes since I'm not sure if they were intentionally testing any
specific logic.

(cherry picked from commit 3ad6d350c44f54482a86a7eb488732093eaed372)

Added: 
    llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll
    llvm/test/Analysis/CostModel/RISCV/scalable-scatter-zve32f.ll

Modified: 
    llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
    llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll
new file mode 100644
index 0000000000000..20749d07c44fa

--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather-zve32f.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV32
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV64
+
+define void @masked_gather() {
+; RV32-LABEL: 'masked_gather'
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'masked_gather'
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+  %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+  %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+  %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+
+  %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+  %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+  %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+  %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+  %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+
+  %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+  %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+  %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+  %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+
+  %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+  %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+  %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+  %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+  %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+
+  %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+  %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+  %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+  %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+  %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+  %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+
+  %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+  %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+  %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+  %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+  %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+  %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
index 997ec12b714f6..e4faaceea38d8 100644
--- a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
@@ -1,86 +1,79 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV32
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV64
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV32
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV64
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
 
 define void @masked_gather_aligned() {
-; GENERIC-LABEL: 'masked_gather_aligned'
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RV32-LABEL: 'masked_gather_aligned'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; MAX256-LABEL: 'masked_gather_aligned'
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RV64-LABEL: 'masked_gather_aligned'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x double> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x double> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x double> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <vscale x 16 x float> @llvm.masked.gather.nxv16f32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x float> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x float> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x float> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x float> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x float> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x i64> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x i64> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x i64> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x i64> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <vscale x 16 x i32> @llvm.masked.gather.nxv16i32.nxv16p0(<vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef, <vscale x 16 x i32> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x i32> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x i32> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x i32> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x i32> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <vscale x 32 x i16> @llvm.masked.gather.nxv32i16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x i16> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <vscale x 16 x i16> @llvm.masked.gather.nxv16i16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x i16> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x i16> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I16 = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <vscale x 64 x i8> @llvm.masked.gather.nxv64i8.nxv64p0(<vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef, <vscale x 64 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef, <vscale x 32 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; UNSUPPORTED-LABEL: 'masked_gather_aligned'
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
@@ -114,10 +107,6 @@ define void @masked_gather_aligned() {
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
@@ -157,32 +146,18 @@ define void @masked_gather_aligned() {
   %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
   %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
 
-
-  %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
-  %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
-  %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
-  %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
   ret void
 }
 
 define void @masked_gather_aligned_f16() {
-; GENERIC-LABEL: 'masked_gather_aligned_f16'
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; MAX256-LABEL: 'masked_gather_aligned_f16'
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SUPPORTED-LABEL: 'masked_gather_aligned_f16'
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <vscale x 16 x half> @llvm.masked.gather.nxv16f16.nxv16p0(<vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef, <vscale x 16 x half> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef, <vscale x 8 x half> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef, <vscale x 4 x half> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef, <vscale x 2 x half> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1F16 = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef, <vscale x 1 x half> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; UNSUPPORTED-LABEL: 'masked_gather_aligned_f16'
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V32F16 = call <vscale x 32 x half> @llvm.masked.gather.nxv32f16.nxv32p0(<vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef, <vscale x 32 x half> undef)
@@ -229,10 +204,6 @@ define void @masked_gather_unaligned() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4I16.u = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V8F64.u = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
@@ -264,11 +235,6 @@ define void @masked_gather_unaligned() {
   %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
   %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
 
-  %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
-  %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
-  %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
-  %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
-
   ret void
 }
 
@@ -291,3 +257,54 @@ define void @masked_gather_unaligned_f16() {
 
   ret void
 }
+
+define void @masked_gather_ptr_align4() {
+; RV32-LABEL: 'masked_gather_ptr_align4'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'masked_gather_ptr_align4'
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; UNSUPPORTED-LABEL: 'masked_gather_ptr_align4'
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+  %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+  %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+  %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
+  ret void
+}
+
+define void @masked_gather_ptr_align8() {
+; SUPPORTED-LABEL: 'masked_gather_ptr_align8'
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; UNSUPPORTED-LABEL: 'masked_gather_ptr_align8'
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  %V8PTR = call <vscale x 8 x ptr> @llvm.masked.gather.nxv8p0.nxv8p0(<vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef, <vscale x 8 x ptr> undef)
+  %V4PTR = call <vscale x 4 x ptr> @llvm.masked.gather.nxv4p0.nxv4p0(<vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef, <vscale x 4 x ptr> undef)
+  %V2PTR = call <vscale x 2 x ptr> @llvm.masked.gather.nxv2p0.nxv2p0(<vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef, <vscale x 2 x ptr> undef)
+  %V1PTR= call <vscale x 1 x ptr> @llvm.masked.gather.nxv1p0.nxv1p0(<vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef, <vscale x 1 x ptr> undef)
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter-zve32f.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter-zve32f.ll
new file mode 100644
index 0000000000000..982c227de553a
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter-zve32f.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV32
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b < %s | FileCheck %s --check-prefixes=RV64
+
+define void @masked_scatter() {
+; RV32-LABEL: 'masked_scatter'
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'masked_scatter'
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+
+  call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
+
+  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+
+  call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
+  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
+
+  call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
+  call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
+  call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
+
+  call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
+  call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
+  call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
+  call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
+
+  call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
+
+  ret void
+}

diff  --git a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
index 69abcdea176cc..204fee14b9ce1 100644
--- a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
@@ -1,86 +1,79 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV32
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV64
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV32
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin < %s | FileCheck %s --check-prefixes=CHECK,SUPPORTED,RV64
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
-; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=riscv64 -mattr=+zve32f,+zvl128b,+f,+d,+zfh,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
 
 define void @masked_scatter_aligned() {
-; GENERIC-LABEL: 'masked_scatter_aligned'
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> undef, i32 4, <vscale x 16 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 4, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 4, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> undef, i32 4, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> undef, i32 4, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> undef, i32 1, <vscale x 64 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> undef, i32 1, <vscale x 32 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RV32-LABEL: 'masked_scatter_aligned'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; MAX256-LABEL: 'masked_scatter_aligned'
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; RV64-LABEL: 'masked_scatter_aligned'
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0(<vscale x 16 x float> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0(<vscale x 16 x i32> undef, <vscale x 16 x ptr> align 4 undef, <vscale x 16 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0(<vscale x 32 x i16> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0(<vscale x 64 x i8> undef, <vscale x 64 x ptr> align 1 undef, <vscale x 64 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0(<vscale x 32 x i8> undef, <vscale x 32 x ptr> align 1 undef, <vscale x 32 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> undef, <vscale x 16 x ptr> align 1 undef, <vscale x 16 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; UNSUPPORTED-LABEL: 'masked_scatter_aligned'
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
@@ -114,10 +107,6 @@ define void @masked_scatter_aligned() {
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
-; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
@@ -157,32 +146,20 @@ define void @masked_scatter_aligned() {
   call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
   call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
 
-  call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 8, <vscale x 8 x i1> undef)
-  call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 8, <vscale x 4 x i1> undef)
-  call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 8, <vscale x 2 x i1> undef)
-  call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 8, <vscale x 1 x i1> undef)
-
   ret void
 }
 
 define void @masked_scatter_aligned_f16() {
-; GENERIC-LABEL: 'masked_scatter_aligned_f16'
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> undef, i32 2, <vscale x 32 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> undef, i32 2, <vscale x 16 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> undef, i32 2, <vscale x 4 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> undef, i32 2, <vscale x 2 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> undef, i32 2, <vscale x 1 x i1> undef)
-; GENERIC-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; MAX256-LABEL: 'masked_scatter_aligned_f16'
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef)
-; MAX256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SUPPORTED-LABEL: 'masked_scatter_aligned_f16'
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0(<vscale x 16 x half> undef, <vscale x 16 x ptr> align 2 undef, <vscale x 16 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> undef, <vscale x 8 x ptr> align 2 undef, <vscale x 8 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> undef, <vscale x 4 x ptr> align 2 undef, <vscale x 4 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> undef, <vscale x 2 x ptr> align 2 undef, <vscale x 2 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> undef, <vscale x 1 x ptr> align 2 undef, <vscale x 1 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; UNSUPPORTED-LABEL: 'masked_scatter_aligned_f16'
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0(<vscale x 32 x half> undef, <vscale x 32 x ptr> align 2 undef, <vscale x 32 x i1> undef)
@@ -229,10 +206,6 @@ define void @masked_scatter_unaligned() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 1 undef, <vscale x 8 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 1 undef, <vscale x 4 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 1 undef, <vscale x 2 x i1> undef)
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 1 undef, <vscale x 1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> undef, <vscale x 8 x ptr> undef, i32 2, <vscale x 8 x i1> undef)
@@ -264,11 +237,6 @@ define void @masked_scatter_unaligned() {
   call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
   call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
 
-  call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
-  call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
-  call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
-  call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> undef, i32 1, <vscale x 1 x i1> undef)
-
   ret void
 }
 
@@ -291,3 +259,56 @@ define void @masked_scatter_unaligned_f16() {
 
   ret void
 }
+
+define void @masked_scatter_ptr_align4() {
+; RV32-LABEL: 'masked_scatter_ptr_align4'
+; RV32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV32-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; RV64-LABEL: 'masked_scatter_ptr_align4'
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; RV64-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; RV64-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; UNSUPPORTED-LABEL: 'masked_scatter_ptr_align4'
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 4 undef, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 4 undef, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 4 undef, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 4 undef, <vscale x 1 x i1> undef)
+
+  ret void
+}
+
+define void @masked_scatter_ptr_align8() {
+; SUPPORTED-LABEL: 'masked_scatter_ptr_align8'
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; SUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; UNSUPPORTED-LABEL: 'masked_scatter_ptr_align8'
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @llvm.masked.scatter.nxv8p0.nxv8p0(<vscale x 8 x ptr> undef, <vscale x 8 x ptr> align 8 undef, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4p0.nxv4p0(<vscale x 4 x ptr> undef, <vscale x 4 x ptr> align 8 undef, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2p0.nxv2p0(<vscale x 2 x ptr> undef, <vscale x 2 x ptr> align 8 undef, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1p0.nxv1p0(<vscale x 1 x ptr> undef, <vscale x 1 x ptr> align 8 undef, <vscale x 1 x i1> undef)
+
+  ret void
+}