[llvm] b481512 - [SVE] Move reg+reg gather/scatter addressing optimisations from lowering into DAG combine.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 29 09:43:54 PDT 2022
Author: Paul Walker
Date: 2022-04-29T17:42:33+01:00
New Revision: b481512485a87a5510bf28f63cc512ad26c075a8
URL: https://github.com/llvm/llvm-project/commit/b481512485a87a5510bf28f63cc512ad26c075a8
DIFF: https://github.com/llvm/llvm-project/commit/b481512485a87a5510bf28f63cc512ad26c075a8.diff
LOG: [SVE] Move reg+reg gather/scatter addressing optimisations from lowering into DAG combine.
This is essentially a refactoring patch but allows more cases to
be caught, hence the output changes to some tests.
Differential Revision: https://reviews.llvm.org/D122994
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 89bbced4c738..f178db8d71b4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4651,63 +4651,29 @@ bool getGatherScatterIndexIsExtended(SDValue Index) {
return false;
}
-// If the base pointer of a masked gather or scatter is null, we
-// may be able to swap BasePtr & Index and use the vector + register
-// or vector + immediate addressing mode, e.g.
-// VECTOR + REGISTER:
-// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
-// -> getelementptr %offset, <vscale x N x T> %indices
+// If the base pointer of a masked gather or scatter is constant, we
+// may be able to swap BasePtr & Index and use the vector + immediate addressing
+// mode, e.g.
// VECTOR + IMMEDIATE:
// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
// -> getelementptr #x, <vscale x N x T> %indices
void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index,
bool IsScaled, EVT MemVT, unsigned &Opcode,
bool IsGather, SelectionDAG &DAG) {
- if (!isNullConstant(BasePtr) || IsScaled)
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(BasePtr);
+ if (!Offset || IsScaled)
return;
- // FIXME: This will not match for fixed vector type codegen as the nodes in
- // question will have fixed<->scalable conversions around them. This should be
- // moved to a DAG combine or complex pattern so that is executes after all of
- // the fixed vector insert and extracts have been removed. This deficiency
- // will result in a sub-optimal addressing mode being used, i.e. an ADD not
- // being folded into the scatter/gather.
- ConstantSDNode *Offset = nullptr;
- if (Index.getOpcode() == ISD::ADD)
- if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
- if (isa<ConstantSDNode>(SplatVal))
- Offset = cast<ConstantSDNode>(SplatVal);
- else {
- BasePtr = SplatVal;
- Index = Index->getOperand(0);
- return;
- }
- }
-
- unsigned NewOp =
- IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
-
- if (!Offset) {
- std::swap(BasePtr, Index);
- Opcode = NewOp;
- return;
- }
-
uint64_t OffsetVal = Offset->getZExtValue();
unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
- auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
- if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
- // Index is out of range for the immediate addressing mode
- BasePtr = ConstOffset;
- Index = Index->getOperand(0);
+ if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31)
return;
- }
// Immediate is in range
- Opcode = NewOp;
- BasePtr = Index->getOperand(0);
- Index = ConstOffset;
+ Opcode =
+ IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
+ std::swap(BasePtr, Index);
}
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
@@ -17136,43 +17102,43 @@ static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale,
static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N,
SDValue &BasePtr, SDValue &Index,
SelectionDAG &DAG) {
+ // Try to iteratively fold parts of the index into the base pointer to
+ // simplify the index as much as possible.
+ bool Changed = false;
+ while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
+ Changed = true;
+
// Only consider element types that are pointer sized as smaller types can
// be easily promoted.
EVT IndexVT = Index.getValueType();
if (IndexVT.getVectorElementType() != MVT::i64 || IndexVT == MVT::nxv2i64)
- return false;
-
- // Try to iteratively fold parts of the index into the base pointer to
- // simplify the index as much as possible.
- SDValue NewBasePtr = BasePtr, NewIndex = Index;
- while (foldIndexIntoBase(NewBasePtr, NewIndex, N->getScale(), SDLoc(N), DAG))
- ;
+ return Changed;
// Match:
// Index = step(const)
int64_t Stride = 0;
- if (NewIndex.getOpcode() == ISD::STEP_VECTOR)
- Stride = cast<ConstantSDNode>(NewIndex.getOperand(0))->getSExtValue();
+ if (Index.getOpcode() == ISD::STEP_VECTOR)
+ Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();
// Match:
// Index = step(const) << shift(const)
- else if (NewIndex.getOpcode() == ISD::SHL &&
- NewIndex.getOperand(0).getOpcode() == ISD::STEP_VECTOR) {
- SDValue RHS = NewIndex.getOperand(1);
+ else if (Index.getOpcode() == ISD::SHL &&
+ Index.getOperand(0).getOpcode() == ISD::STEP_VECTOR) {
+ SDValue RHS = Index.getOperand(1);
if (auto *Shift =
dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(RHS))) {
- int64_t Step = (int64_t)NewIndex.getOperand(0).getConstantOperandVal(1);
+ int64_t Step = (int64_t)Index.getOperand(0).getConstantOperandVal(1);
Stride = Step << Shift->getZExtValue();
}
}
// Return early because no supported pattern is found.
if (Stride == 0)
- return false;
+ return Changed;
if (Stride < std::numeric_limits<int32_t>::min() ||
Stride > std::numeric_limits<int32_t>::max())
- return false;
+ return Changed;
const auto &Subtarget =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
@@ -17183,14 +17149,13 @@ static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N,
if (LastElementOffset < std::numeric_limits<int32_t>::min() ||
LastElementOffset > std::numeric_limits<int32_t>::max())
- return false;
+ return Changed;
EVT NewIndexVT = IndexVT.changeVectorElementType(MVT::i32);
// Stride does not scale explicitly by 'Scale', because it happens in
// the gather/scatter addressing mode.
Index = DAG.getNode(ISD::STEP_VECTOR, SDLoc(N), NewIndexVT,
DAG.getTargetConstant(Stride, SDLoc(N), MVT::i32));
- BasePtr = NewBasePtr;
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
index 085e022fb8ef..0922b4bd0eab 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
@@ -1155,7 +1155,6 @@ define void @masked_gather_64b_unscaled(<32 x float>* %a, <32 x i64>* %b, i8* %b
ret void
}
-; FIXME: This case does not yet codegen well due to deficiencies in opcode selection
define void @masked_gather_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %off) #0 {
; VBITS_GE_2048-LABEL: masked_gather_vec_plus_reg:
; VBITS_GE_2048: // %bb.0:
@@ -1163,11 +1162,9 @@ define void @masked_gather_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %o
; VBITS_GE_2048-NEXT: ptrue p1.d, vl32
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1]
-; VBITS_GE_2048-NEXT: mov z2.d, x2
; VBITS_GE_2048-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; VBITS_GE_2048-NEXT: add z0.d, z1.d, z2.d
; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b
-; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d]
+; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z1.d]
; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_2048-NEXT: ret
@@ -1181,7 +1178,6 @@ define void @masked_gather_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %o
ret void
}
-; FIXME: This case does not yet codegen well due to deficiencies in opcode selection
define void @masked_gather_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) #0 {
; VBITS_GE_2048-LABEL: masked_gather_vec_plus_imm:
; VBITS_GE_2048: // %bb.0:
@@ -1190,9 +1186,8 @@ define void @masked_gather_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) #0 {
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1]
; VBITS_GE_2048-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; VBITS_GE_2048-NEXT: add z1.d, z1.d, #4
; VBITS_GE_2048-NEXT: punpklo p1.h, p1.b
-; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z1.d]
+; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z1.d, #4]
; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s
; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_2048-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
index 4e6175aa9ef5..79d27b4ca758 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
@@ -1051,7 +1051,6 @@ define void @masked_scatter_64b_unscaled(<32 x float>* %a, <32 x i64>* %b, i8* %
ret void
}
-; FIXME: This case does not yet codegen well due to deficiencies in opcode selection
define void @masked_scatter_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %off) #0 {
; VBITS_GE_2048-LABEL: masked_scatter_vec_plus_reg:
; VBITS_GE_2048: // %bb.0:
@@ -1059,12 +1058,10 @@ define void @masked_scatter_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %
; VBITS_GE_2048-NEXT: ptrue p1.d, vl32
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1]
-; VBITS_GE_2048-NEXT: mov z2.d, x2
; VBITS_GE_2048-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
-; VBITS_GE_2048-NEXT: add z1.d, z1.d, z2.d
; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b
-; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [z1.d]
+; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [x2, z1.d]
; VBITS_GE_2048-NEXT: ret
%vals = load <32 x float>, <32 x float>* %a
%bases = load <32 x i8*>, <32 x i8*>* %b
@@ -1075,7 +1072,6 @@ define void @masked_scatter_vec_plus_reg(<32 x float>* %a, <32 x i8*>* %b, i64 %
ret void
}
-; FIXME: This case does not yet codegen well due to deficiencies in opcode selection
define void @masked_scatter_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) #0 {
; VBITS_GE_2048-LABEL: masked_scatter_vec_plus_imm:
; VBITS_GE_2048: // %bb.0:
@@ -1084,10 +1080,9 @@ define void @masked_scatter_vec_plus_imm(<32 x float>* %a, <32 x i8*>* %b) #0 {
; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1]
; VBITS_GE_2048-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
-; VBITS_GE_2048-NEXT: add z1.d, z1.d, #4
; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s
; VBITS_GE_2048-NEXT: punpklo p0.h, p0.b
-; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [z1.d]
+; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [z1.d, #4]
; VBITS_GE_2048-NEXT: ret
%vals = load <32 x float>, <32 x float>* %a
%bases = load <32 x i8*>, <32 x i8*>* %b
diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
index 82beb94dfcac..9257a6a54ba8 100644
--- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll
@@ -105,20 +105,18 @@ define void @scatter_i8_index_offset_maximum_plus_one(i8* %base, i64 %offset, <v
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov w9, #67108864
; CHECK-NEXT: lsr x8, x8, #4
-; CHECK-NEXT: mov z1.d, x1
+; CHECK-NEXT: add x10, x0, x1
; CHECK-NEXT: punpklo p1.h, p0.b
-; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: uunpklo z3.d, z0.s
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: mov w9, #33554432
-; CHECK-NEXT: index z2.d, #0, x9
-; CHECK-NEXT: mov z3.d, x8
-; CHECK-NEXT: add z3.d, z2.d, z3.d
-; CHECK-NEXT: add z2.d, z2.d, z1.d
-; CHECK-NEXT: add z1.d, z3.d, z1.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
+; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: st1b { z3.d }, p1, [x0, z2.d]
-; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: index z1.d, #0, x9
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: st1b { z3.d }, p1, [x10, z1.d]
+; CHECK-NEXT: add z2.d, z1.d, z2.d
+; CHECK-NEXT: st1b { z0.d }, p0, [x10, z2.d]
; CHECK-NEXT: ret
%t0 = insertelement <vscale x 4 x i64> undef, i64 %offset, i32 0
%t1 = shufflevector <vscale x 4 x i64> %t0, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
@@ -140,20 +138,18 @@ define void @scatter_i8_index_offset_minimum_minus_one(i8* %base, i64 %offset, <
; CHECK-NEXT: mov x9, #-2
; CHECK-NEXT: lsr x8, x8, #4
; CHECK-NEXT: movk x9, #64511, lsl #16
-; CHECK-NEXT: mov z1.d, x1
+; CHECK-NEXT: add x10, x0, x1
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: mov x9, #-33554433
-; CHECK-NEXT: punpkhi p0.h, p0.b
-; CHECK-NEXT: index z2.d, #0, x9
-; CHECK-NEXT: mov z3.d, x8
-; CHECK-NEXT: add z3.d, z2.d, z3.d
-; CHECK-NEXT: add z2.d, z2.d, z1.d
-; CHECK-NEXT: add z1.d, z3.d, z1.d
; CHECK-NEXT: uunpklo z3.d, z0.s
+; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: st1b { z3.d }, p1, [x0, z2.d]
-; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: index z1.d, #0, x9
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: st1b { z3.d }, p1, [x10, z1.d]
+; CHECK-NEXT: add z2.d, z1.d, z2.d
+; CHECK-NEXT: st1b { z0.d }, p0, [x10, z2.d]
; CHECK-NEXT: ret
%t0 = insertelement <vscale x 4 x i64> undef, i64 %offset, i32 0
%t1 = shufflevector <vscale x 4 x i64> %t0, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
@@ -174,20 +170,18 @@ define void @scatter_i8_index_stride_too_big(i8* %base, i64 %offset, <vscale x 4
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: mov x9, #-9223372036854775808
; CHECK-NEXT: lsr x8, x8, #4
-; CHECK-NEXT: mov z1.d, x1
+; CHECK-NEXT: add x10, x0, x1
; CHECK-NEXT: punpklo p1.h, p0.b
-; CHECK-NEXT: punpkhi p0.h, p0.b
+; CHECK-NEXT: uunpklo z3.d, z0.s
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: mov x9, #4611686018427387904
-; CHECK-NEXT: index z2.d, #0, x9
-; CHECK-NEXT: mov z3.d, x8
-; CHECK-NEXT: add z3.d, z2.d, z3.d
-; CHECK-NEXT: add z2.d, z2.d, z1.d
-; CHECK-NEXT: add z1.d, z3.d, z1.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
+; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: st1b { z3.d }, p1, [x0, z2.d]
-; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d]
+; CHECK-NEXT: index z1.d, #0, x9
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: st1b { z3.d }, p1, [x10, z1.d]
+; CHECK-NEXT: add z2.d, z1.d, z2.d
+; CHECK-NEXT: st1b { z0.d }, p0, [x10, z2.d]
; CHECK-NEXT: ret
%t0 = insertelement <vscale x 4 x i64> undef, i64 %offset, i32 0
%t1 = shufflevector <vscale x 4 x i64> %t0, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
@@ -346,9 +340,7 @@ define <vscale x 2 x i64> @masked_gather_nxv2i64_const_with_vec_offsets(<vscale
define <vscale x 2 x i64> @masked_gather_nxv2i64_null_with_vec_plus_scalar_offsets(<vscale x 2 x i64> %vector_offsets, i64 %scalar_offset, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: masked_gather_nxv2i64_null_with_vec_plus_scalar_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: mov z1.d, x0
-; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: lsl x8, x0, #3
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d, lsl #3]
; CHECK-NEXT: ret
%scalar_offset.ins = insertelement <vscale x 2 x i64> undef, i64 %scalar_offset, i64 0
@@ -362,8 +354,7 @@ define <vscale x 2 x i64> @masked_gather_nxv2i64_null_with_vec_plus_scalar_offse
define <vscale x 2 x i64> @masked_gather_nxv2i64_null_with__vec_plus_imm_offsets(<vscale x 2 x i64> %vector_offsets, <vscale x 2 x i1> %pg) #0 {
; CHECK-LABEL: masked_gather_nxv2i64_null_with__vec_plus_imm_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT: mov w8, #8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d, lsl #3]
; CHECK-NEXT: ret
%scalar_offset.ins = insertelement <vscale x 2 x i64> undef, i64 1, i64 0
@@ -427,9 +418,7 @@ define void @masked_scatter_nxv2i64_const_with_vec_offsets(<vscale x 2 x i64> %v
define void @masked_scatter_nxv2i64_null_with_vec_plus_scalar_offsets(<vscale x 2 x i64> %vector_offsets, i64 %scalar_offset, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %data) #0 {
; CHECK-LABEL: masked_scatter_nxv2i64_null_with_vec_plus_scalar_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: add z0.d, z0.d, z2.d
+; CHECK-NEXT: lsl x8, x0, #3
; CHECK-NEXT: st1d { z1.d }, p0, [x8, z0.d, lsl #3]
; CHECK-NEXT: ret
%scalar_offset.ins = insertelement <vscale x 2 x i64> undef, i64 %scalar_offset, i64 0
@@ -443,8 +432,7 @@ define void @masked_scatter_nxv2i64_null_with_vec_plus_scalar_offsets(<vscale x
define void @masked_scatter_nxv2i64_null_with__vec_plus_imm_offsets(<vscale x 2 x i64> %vector_offsets, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %data) #0 {
; CHECK-LABEL: masked_scatter_nxv2i64_null_with__vec_plus_imm_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT: mov w8, #8
; CHECK-NEXT: st1d { z1.d }, p0, [x8, z0.d, lsl #3]
; CHECK-NEXT: ret
%scalar_offset.ins = insertelement <vscale x 2 x i64> undef, i64 1, i64 0
More information about the llvm-commits
mailing list