[llvm] 0aebd1d - [RISCV] Fix crash when costing scalable gather/scatter of pointer

Mon Jun 20 12:50:49 PDT 2022

Author: Philip Reames
Date: 2022-06-20T12:50:42-07:00
New Revision: 0aebd1d8758603e4e5af39d50e6628f53b583609

URL: https://github.com/llvm/llvm-project/commit/0aebd1d8758603e4e5af39d50e6628f53b583609
DIFF: https://github.com/llvm/llvm-project/commit/0aebd1d8758603e4e5af39d50e6628f53b583609.diff

LOG: [RISCV] Fix crash when costing scalable gather/scatter of pointer

This was a bug introduced in d764aa. A pointer type is not a primitive type, and thus we were ending up dividing by zero when computing VLMax.

Differential Revision: https://reviews.llvm.org/D128219

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
    llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 9ded2681782a..e35e17aa3877 100644

--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -239,8 +239,8 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
   InstructionCost MemOpCost = getMemoryOpCost(Opcode, VTy.getElementType(),
                                               Alignment, 0, CostKind, I);
   if (isa<ScalableVectorType>(VTy)) {
-    const unsigned EltSize = VTy.getScalarSizeInBits();
-    const unsigned MinSize = VTy.getPrimitiveSizeInBits().getKnownMinValue();
+    const unsigned EltSize = DL.getTypeSizeInBits(VTy.getElementType());
+    const unsigned MinSize = DL.getTypeSizeInBits(&VTy).getKnownMinValue();
     const unsigned VectorBitsMax = ST->getRealMaxVLen();
     const unsigned MaxVLMAX =
       RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);

diff  --git a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
index 7e733d6810b7..de4ffbe1d5d1 100644
--- a/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-gather.ll
@@ -42,6 +42,10 @@ define void @masked_gather_aligned() {
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
+; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
+; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
+; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: %V1PTR = call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; MAX256-LABEL: 'masked_gather_aligned'
@@ -82,6 +86,10 @@ define void @masked_gather_aligned() {
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V1PTR = call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; UNSUPPORTED-LABEL: 'masked_gather_aligned'
@@ -122,6 +130,10 @@ define void @masked_gather_aligned() {
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V8F64 = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
@@ -167,6 +179,12 @@ define void @masked_gather_aligned() {
   %V4I8 = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8> undef)
   %V2I8 = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8> undef)
   %V1I8 = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8> undef)
+
+
+  %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
+  %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
+  %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
+  %V1PTR= call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
   ret void
 }
 
@@ -202,6 +220,10 @@ define void @masked_gather_unaligned() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4I16.u = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V1PTR = call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V8F64.u = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> undef, i32 2, <vscale x 8 x i1> undef, <vscale x 8 x double> undef)
@@ -240,6 +262,11 @@ define void @masked_gather_unaligned() {
   %V2I16.u = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i16> undef)
   %V1I16.u = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i16> undef)
 
+  %V8PTR = call <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef, <vscale x 8 x i8*> undef)
+  %V4PTR = call <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef, <vscale x 4 x i8*> undef)
+  %V2PTR = call <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef, <vscale x 2 x i8*> undef)
+  %V1PTR= call <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef, <vscale x 1 x i8*> undef)
+
   ret void
 }
 
@@ -286,3 +313,8 @@ declare <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*>
 declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
 declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
 declare <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*>, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
+
+declare <vscale x 8 x i8*> @llvm.masked.gather.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8**>, i32, <vscale x 8 x i1>, <vscale x 8 x i8*>)
+declare <vscale x 4 x i8*> @llvm.masked.gather.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8**>, i32, <vscale x 4 x i1>, <vscale x 4 x i8*>)
+declare <vscale x 2 x i8*> @llvm.masked.gather.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8**>, i32, <vscale x 2 x i1>, <vscale x 2 x i8*>)
+declare <vscale x 1 x i8*> @llvm.masked.gather.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8**>, i32, <vscale x 1 x i1>, <vscale x 1 x i8*>)

diff  --git a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
index eb3f02f649e4..fbf15a556241 100644
--- a/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll
@@ -42,6 +42,10 @@ define void @masked_scatter_aligned() {
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
+; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
+; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
+; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
+; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; MAX256-LABEL: 'masked_scatter_aligned'
@@ -82,6 +86,10 @@ define void @masked_scatter_aligned() {
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
+; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; UNSUPPORTED-LABEL: 'masked_scatter_aligned'
@@ -122,6 +130,10 @@ define void @masked_scatter_aligned() {
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
+; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
 ; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
@@ -168,6 +180,11 @@ define void @masked_scatter_aligned() {
   call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
   call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
 
+  call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
+
   ret void
 }
 
@@ -203,6 +220,10 @@ define void @masked_scatter_unaligned() {
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef)
+; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 2, <vscale x 8 x i1> undef)
@@ -241,6 +262,11 @@ define void @masked_scatter_unaligned() {
   call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef)
   call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef)
 
+  call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef)
+  call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef)
+  call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef)
+  call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef)
+
   ret void
 }
 
@@ -287,3 +313,9 @@ declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8
 declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
 declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
 declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>)
+
+declare void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*>, <vscale x 8 x i8**>, i32, <vscale x 8 x i1>)
+declare void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*>, <vscale x 4 x i8**>, i32, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8**>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*>, <vscale x 1 x i8**>, i32, <vscale x 1 x i1>)
+