[llvm-branch-commits] [llvm] 52e4084 - [SVE][CodeGen] Vector + immediate addressing mode for masked gather/scatter
Kerry McLaughlin via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 18 04:01:41 PST 2020
Author: Kerry McLaughlin
Date: 2020-12-18T11:56:36Z
New Revision: 52e4084d9c3b15dbb73906f28f7f5aa45b835b64
URL: https://github.com/llvm/llvm-project/commit/52e4084d9c3b15dbb73906f28f7f5aa45b835b64
DIFF: https://github.com/llvm/llvm-project/commit/52e4084d9c3b15dbb73906f28f7f5aa45b835b64.diff
LOG: [SVE][CodeGen] Vector + immediate addressing mode for masked gather/scatter
This patch extends LowerMGATHER/MSCATTER to make use of the vector + reg/immediate
addressing modes for scalable masked gathers & scatters.
selectGatherScatterAddrMode checks if the base pointer is null, in which case
we can swap the base pointer and the index, e.g.
getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
-> getelementptr %offset, <vscale x N x T> %indices
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D93132
Added:
llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-imm.ll
llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-reg.ll
llvm/test/CodeGen/AArch64/sve-masked-gather.ll
llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-imm.ll
llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-reg.ll
llvm/test/CodeGen/AArch64/sve-masked-scatter.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9eeacc8df0bf..43db745d6328 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3812,6 +3812,8 @@ unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
return Opcode;
case AArch64ISD::GLD1_MERGE_ZERO:
return AArch64ISD::GLD1S_MERGE_ZERO;
+ case AArch64ISD::GLD1_IMM_MERGE_ZERO:
+ return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
@@ -3843,6 +3845,60 @@ bool getGatherScatterIndexIsExtended(SDValue Index) {
return false;
}
+// If the base pointer of a masked gather or scatter is null, we
+// may be able to swap BasePtr & Index and use the vector + register
+// or vector + immediate addressing mode, e.g.
+// VECTOR + REGISTER:
+// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
+// -> getelementptr %offset, <vscale x N x T> %indices
+// VECTOR + IMMEDIATE:
+// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
+// -> getelementptr #x, <vscale x N x T> %indices
+void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
+ unsigned &Opcode, bool IsGather,
+ SelectionDAG &DAG) {
+ if (!isNullConstant(BasePtr))
+ return;
+
+ ConstantSDNode *Offset = nullptr;
+ if (Index.getOpcode() == ISD::ADD)
+ if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
+ if (isa<ConstantSDNode>(SplatVal))
+ Offset = cast<ConstantSDNode>(SplatVal);
+ else {
+ BasePtr = SplatVal;
+ Index = Index->getOperand(0);
+ return;
+ }
+ }
+
+ unsigned NewOp =
+ IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
+
+ if (!Offset) {
+ std::swap(BasePtr, Index);
+ Opcode = NewOp;
+ return;
+ }
+
+ uint64_t OffsetVal = Offset->getZExtValue();
+ unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
+ auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
+
+ if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
+ // Index is out of range for the immediate addressing mode
+ BasePtr = ConstOffset;
+ Index = Index->getOperand(0);
+ return;
+ }
+
+ // Immediate is in range
+ Opcode = NewOp;
+ BasePtr = Index->getOperand(0);
+ Index = ConstOffset;
+ return;
+}
+
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -3892,6 +3948,9 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
Index = Index.getOperand(0);
unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
+ selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
+ /*isGather=*/true, DAG);
+
if (ResNeedsSignExtend)
Opcode = getSignExtendedGatherOpcode(Opcode);
@@ -3944,9 +4003,12 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
if (getGatherScatterIndexIsExtended(Index))
Index = Index.getOperand(0);
+ unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
+ selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
+ /*isGather=*/false, DAG);
+
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
- return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL,
- VTs, Ops);
+ return DAG.getNode(Opcode, DL, VTs, Ops);
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
index 4482730a7d74..6b1dc031dbb2 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
@@ -44,12 +44,29 @@ define <vscale x 2 x i64> @masked_sgather_zext(i8* %base, <vscale x 2 x i64> %of
; Tests that exercise various type legalisation scenarios for ISD::MGATHER.
+; Code generate load of an illegal datatype via promotion.
+define <vscale x 2 x i8> @masked_gather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i8:
+; CHECK: ld1sb { z0.d }, p0/z, [z0.d]
+; CHECK: ret
+ %data = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
+ ret <vscale x 2 x i8> %data
+}
+
+; Code generate load of an illegal datatype via promotion.
+define <vscale x 2 x i16> @masked_gather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i16:
+; CHECK: ld1sh { z0.d }, p0/z, [z0.d]
+; CHECK: ret
+ %data = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ ret <vscale x 2 x i16> %data
+}
+
; Code generate load of an illegal datatype via promotion.
define <vscale x 2 x i32> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: masked_gather_nxv2i32:
-; CHECK-DAG: mov x8, xzr
-; CHECK-DAG: ld1sw { z0.d }, p0/z, [x8, z0.d]
-; CHECK: ret
+; CHECK: ld1sw { z0.d }, p0/z, [z0.d]
+; CHECK: ret
%data = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
ret <vscale x 2 x i32> %data
}
@@ -92,11 +109,10 @@ define <vscale x 32 x i32> @masked_gather_nxv32i32(i32* %base, <vscale x 32 x i3
define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: masked_sgather_nxv4i8:
; CHECK: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: zip2 p2.s, p0.s, p1.s
; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
-; CHECK-NEXT: ld1sb { z1.d }, p2/z, [x8, z1.d]
-; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT: ld1sb { z1.d }, p2/z, [z1.d]
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
@@ -109,8 +125,6 @@ define <vscale x 4 x i32> @masked_sgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vsca
declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
-
declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
-
declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8(<vscale x 16 x i8*>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 32 x i32> @llvm.masked.gather.nxv32i32(<vscale x 32 x i32*>, i32, <vscale x 32 x i1>, <vscale x 32 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-imm.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-imm.ll
new file mode 100644
index 000000000000..d2f595ebef76
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-imm.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d, #1]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 1
+ %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
+ %vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #2]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 1
+ %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ %vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #4]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 1
+ %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
+ %vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i64*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #8]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i64, <vscale x 2 x i64*> %bases, i32 1
+ %vals.zext = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 2
+ %vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
+ ret <vscale x 2 x half> %vals
+}
+
+define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
+; CHECK-LABEL: masked_gather_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d, #4]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 2
+ %vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
+ ret <vscale x 2 x bfloat> %vals
+}
+
+define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #12]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 3
+ %vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
+ ret <vscale x 2 x float> %vals
+}
+
+define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d, #32]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 4
+ %vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
+ ret <vscale x 2 x double> %vals
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d, #5]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 5
+ %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
+ %vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d, #12]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 6
+ %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ %vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d, #28]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 7
+ %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
+ %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+; Tests where the immediate is out of range
+
+define <vscale x 2 x i64> @masked_gather_nxv2i8_range(<vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i8_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 32
+ %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
+ %vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x half> @masked_gather_nxv2f16_range(<vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f16_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #64
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 32
+ %vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
+ ret <vscale x 2 x half> %vals
+}
+
+define <vscale x 2 x bfloat> @masked_gather_nxv2bf16_range(<vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
+; CHECK-LABEL: masked_gather_nxv2bf16_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #64
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 32
+ %vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
+ ret <vscale x 2 x bfloat> %vals
+}
+
+define <vscale x 2 x float> @masked_gather_nxv2f32_range(<vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f32_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #128
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 32
+ %vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
+ ret <vscale x 2 x float> %vals
+}
+
+define <vscale x 2 x double> @masked_gather_nxv2f64_range(<vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f64_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #256
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 32
+ %vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
+ ret <vscale x 2 x double> %vals
+}
+
+declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
+declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
+declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
+declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
+declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
+declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+attributes #0 = { "target-features"="+sve,+bf16" }
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-reg.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-reg.ll
new file mode 100644
index 000000000000..212606ca24ae
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-vec-plus-reg.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
+ %vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
+ %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ %vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
+ %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
+ %vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
+ %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
+ ret <vscale x 2 x i64> %vals
+}
+
+define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
+ %vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
+ ret <vscale x 2 x half> %vals
+}
+
+define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) #0 {
+; CHECK-LABEL: masked_gather_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
+ %vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
+ ret <vscale x 2 x bfloat> %vals
+}
+
+define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
+ %vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
+ ret <vscale x 2 x float> %vals
+}
+
+define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
+ %vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
+ ret <vscale x 2 x double> %vals
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
+ %vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
+ %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ %vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
+ %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
+ %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
+declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
+declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
+declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
+declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
+declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+attributes #0 = { "target-features"="+sve,+bf16" }
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather.ll
new file mode 100644
index 000000000000..f9a476bbb1c1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
+ %vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ %vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
+ %vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.zext
+}
+
+define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
+ ret <vscale x 2 x i64> %vals
+}
+
+define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
+ ret <vscale x 2 x half> %vals
+}
+
+define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x bfloat*> %ptrs, <vscale x 2 x i1> %mask) #0 {
+; CHECK-LABEL: masked_gather_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
+ ret <vscale x 2 x bfloat> %vals
+}
+
+define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
+ ret <vscale x 2 x float> %vals
+}
+
+define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_gather_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
+ ret <vscale x 2 x double> %vals
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
+ %vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
+ %vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_sgather_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d]
+; CHECK-NEXT: ret
+ %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
+ %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %vals.sext
+}
+
+declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
+declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
+declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
+declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
+declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
+declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+attributes #0 = { "target-features"="+sve,+bf16" }
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-imm.ll b/llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-imm.ll
new file mode 100644
index 000000000000..cc33f77d7d88
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-imm.ll
@@ -0,0 +1,138 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1b { z0.d }, p0, [z1.d, #1]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 1
+ call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #2]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 1
+ call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #4]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 1
+ call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1d { z0.d }, p0, [z1.d, #8]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i64, <vscale x 2 x i64*> %bases, i32 1
+ call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #4]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr half, <vscale x 2 x half*> %bases, i32 2
+ call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %bases, <vscale x 2 x i1> %mask) #0 {
+; CHECK-LABEL: masked_scatter_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [z1.d, #4]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr bfloat, <vscale x 2 x bfloat*> %bases, i32 2
+ call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z0.d }, p0, [z1.d, #12]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr float, <vscale x 2 x float*> %bases, i32 3
+ call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1d { z0.d }, p0, [z1.d, #32]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 4
+ call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+; Test where the immediate is out of range
+
+define void @masked_scatter_nxv2i8_range(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i8_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #32
+; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i32 32
+ call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2i16_range(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i16_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #64
+; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i16, <vscale x 2 x i16*> %bases, i32 32
+ call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2i32_range(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i32_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #128
+; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i32, <vscale x 2 x i32*> %bases, i32 32
+ call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2f64_range(<vscale x 2 x double> %data, <vscale x 2 x double*> %bases, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2f64_range:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #256
+; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr double, <vscale x 2 x double*> %bases, i32 32
+ call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
+attributes #0 = { "target-features"="+sve,+bf16" }
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-reg.ll b/llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-reg.ll
new file mode 100644
index 000000000000..4164158c36cb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-masked-scatter-vec-plus-reg.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: ret
+ %ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
+ call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
+ call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
+ call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
+ call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) #0 {
+; CHECK-LABEL: masked_scatter_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
+ call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 2, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
+ call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i8*> %bases, i64 %offset, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: masked_scatter_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: ret
+ %byte_ptrs = getelementptr i8, <vscale x 2 x i8*> %bases, i64 %offset
+ %ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
+ call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %mask)
+ ret void
+}
+
+declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
+attributes #0 = { "target-features"="+sve,+bf16" }
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-masked-scatter.ll
new file mode 100644
index 000000000000..002fb6ae5a67
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-masked-scatter.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define void @masked_scatter_nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
+; CHECK-LABEL: masked_scatter_nxv2i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1b { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+define void @masked_scatter_nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
+; CHECK-LABEL: masked_scatter_nxv2i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16> %data, <vscale x 2 x i16*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+define void @masked_scatter_nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
+; CHECK-LABEL: masked_scatter_nxv2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32> %data, <vscale x 2 x i32*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+define void @masked_scatter_nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
+; CHECK-LABEL: masked_scatter_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i64*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+define void @masked_scatter_nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
+; CHECK-LABEL: masked_scatter_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half> %data, <vscale x 2 x half*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+define void @masked_scatter_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, <vscale x 2 x i1> %masks) nounwind #0 {
+; CHECK-LABEL: masked_scatter_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1h { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x bfloat*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+define void @masked_scatter_nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
+; CHECK-LABEL: masked_scatter_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> %data, <vscale x 2 x float*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+define void @masked_scatter_nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %masks) nounwind {
+; CHECK-LABEL: masked_scatter_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1d { z0.d }, p0, [z1.d]
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x double*> %ptrs, i32 0, <vscale x 2 x i1> %masks)
+ ret void
+}
+
+declare void @llvm.masked.scatter.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.scatter.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
+attributes #0 = { "target-features"="+sve,+bf16" }
More information about the llvm-branch-commits
mailing list