[llvm] 306c8ab - [SVE][CodeGen] Improve codegen of scalable masked scatters
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 13 03:26:06 PST 2020
Author: Kerry McLaughlin
Date: 2020-11-13T11:19:36Z
New Revision: 306c8ab20841aceef449dd27e1d0f46c6b85be44
URL: https://github.com/llvm/llvm-project/commit/306c8ab20841aceef449dd27e1d0f46c6b85be44
DIFF: https://github.com/llvm/llvm-project/commit/306c8ab20841aceef449dd27e1d0f46c6b85be44.diff
LOG: [SVE][CodeGen] Improve codegen of scalable masked scatters
If the scatter store is able to perform the sign/zero extend of
its index, this is folded into the instruction with refineIndexType().
Additionally, refineUniformBase() will return the base pointer and index
from an add + splat_vector.
Reviewed By: sdesmalen
Differential Revision: https://reviews.llvm.org/D90942
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-scaled.ll
llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-unscaled.ll
llvm/test/CodeGen/AArch64/sve-masked-scatter-64b-unscaled.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 1734c36bda6b..30d1623d80c2 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1318,6 +1318,10 @@ class TargetLoweringBase {
getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
}
+ // Returns true if VT is a legal index type for masked gathers/scatters
+ // on this target
+ virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; }
+
/// Return how the condition code should be treated: either it is legal, needs
/// to be expanded to some other code sequence, or the target has a custom
/// expander for it.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a247141aa252..d18d37eea41a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9399,16 +9399,74 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
+bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
+ if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
+ return false;
+
+ // For now we check only the LHS of the add.
+ SDValue LHS = Index.getOperand(0);
+ SDValue SplatVal = DAG.getSplatValue(LHS);
+ if (!SplatVal)
+ return false;
+
+ BasePtr = SplatVal;
+ Index = Index.getOperand(1);
+ return true;
+}
+
+// Fold sext/zext of index into index type.
+bool refineIndexType(MaskedScatterSDNode *MSC, SDValue &Index, bool Scaled,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Op = Index.getOperand(0);
+
+ if (Index.getOpcode() == ISD::ZERO_EXTEND) {
+ MSC->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ if (Index.getOpcode() == ISD::SIGN_EXTEND) {
+ MSC->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
+ if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
+ Index = Op;
+ return true;
+ }
+ }
+
+ return false;
+}
+
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
SDValue Chain = MSC->getChain();
+ SDValue Index = MSC->getIndex();
+ SDValue Scale = MSC->getScale();
+ SDValue StoreVal = MSC->getValue();
+ SDValue BasePtr = MSC->getBasePtr();
SDLoc DL(N);
// Zap scatters with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ if (refineUniformBase(BasePtr, Index, DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(
+ DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ }
+
+ if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
+ SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
+ return DAG.getMaskedScatter(
+ DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
+ MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
+ }
+
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f2613a8b1b19..53be7a7edf7a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3705,6 +3705,14 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
+bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
+ if (VT.getVectorElementType() == MVT::i32 &&
+ VT.getVectorElementCount().getKnownMinValue() >= 4)
+ return true;
+
+ return false;
+}
+
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector();
}
@@ -3792,11 +3800,8 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
}
- if (getScatterIndexIsExtended(Index)) {
- if (Index.getOpcode() == ISD::AND)
- IsSigned = false;
+ if (getScatterIndexIsExtended(Index))
Index = Index.getOperand(0);
- }
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 47248b948203..5c5b9c885809 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -980,6 +980,7 @@ class AArch64TargetLowering : public TargetLowering {
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-scaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-scaled.ll
index 771f3fe407e6..459fd9ab96b8 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-scaled.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-scaled.ll
@@ -166,15 +166,7 @@ define void @masked_scatter_nxv2f64_zext(<vscale x 2 x double> %data, double* %b
define void @masked_scatter_nxv4i16_sext(<vscale x 4 x i16> %data, i16* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i16_sext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
-; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
; CHECK-NEXT: ret
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %ext
@@ -185,15 +177,7 @@ define void @masked_scatter_nxv4i16_sext(<vscale x 4 x i16> %data, i16* %base, <
define void @masked_scatter_nxv4i32_sext(<vscale x 4 x i32> %data, i32* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i32_sext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, sxtw #2]
-; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, sxtw #2]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
; CHECK-NEXT: ret
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr i32, i32* %base, <vscale x 4 x i64> %ext
@@ -204,15 +188,7 @@ define void @masked_scatter_nxv4i32_sext(<vscale x 4 x i32> %data, i32* %base, <
define void @masked_scatter_nxv4f16_sext(<vscale x 4 x half> %data, half* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4f16_sext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
-; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
; CHECK-NEXT: ret
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr half, half* %base, <vscale x 4 x i64> %ext
@@ -223,15 +199,7 @@ define void @masked_scatter_nxv4f16_sext(<vscale x 4 x half> %data, half* %base,
define void @masked_scatter_nxv4bf16_sext(<vscale x 4 x bfloat> %data, bfloat* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv4bf16_sext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1]
-; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1]
; CHECK-NEXT: ret
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr bfloat, bfloat* %base, <vscale x 4 x i64> %ext
@@ -242,15 +210,7 @@ define void @masked_scatter_nxv4bf16_sext(<vscale x 4 x bfloat> %data, bfloat* %
define void @masked_scatter_nxv4f32_sext(<vscale x 4 x float> %data, float* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv4f32_sext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, sxtw #2]
-; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, sxtw #2]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2]
; CHECK-NEXT: ret
%ext = sext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr float, float* %base, <vscale x 4 x i64> %ext
@@ -261,15 +221,7 @@ define void @masked_scatter_nxv4f32_sext(<vscale x 4 x float> %data, float* %bas
define void @masked_scatter_nxv4i16_zext(<vscale x 4 x i16> %data, i16* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i16_zext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
-; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
; CHECK-NEXT: ret
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr i16, i16* %base, <vscale x 4 x i64> %ext
@@ -280,15 +232,7 @@ define void @masked_scatter_nxv4i16_zext(<vscale x 4 x i16> %data, i16* %base, <
define void @masked_scatter_nxv4i32_zext(<vscale x 4 x i32> %data, i32* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i32_zext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, uxtw #2]
-; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, uxtw #2]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
; CHECK-NEXT: ret
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr i32, i32* %base, <vscale x 4 x i64> %ext
@@ -299,15 +243,7 @@ define void @masked_scatter_nxv4i32_zext(<vscale x 4 x i32> %data, i32* %base, <
define void @masked_scatter_nxv4f16_zext(<vscale x 4 x half> %data, half* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4f16_zext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
-; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
; CHECK-NEXT: ret
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr half, half* %base, <vscale x 4 x i64> %ext
@@ -318,15 +254,7 @@ define void @masked_scatter_nxv4f16_zext(<vscale x 4 x half> %data, half* %base,
define void @masked_scatter_nxv4bf16_zext(<vscale x 4 x bfloat> %data, bfloat* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv4bf16_zext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1]
-; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1]
; CHECK-NEXT: ret
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr bfloat, bfloat* %base, <vscale x 4 x i64> %ext
@@ -337,15 +265,7 @@ define void @masked_scatter_nxv4bf16_zext(<vscale x 4 x bfloat> %data, bfloat* %
define void @masked_scatter_nxv4f32_zext(<vscale x 4 x float> %data, float* %base, <vscale x 4 x i32> %indexes, <vscale x 4 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv4f32_zext:
; CHECK: // %bb.0:
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: uunpkhi z2.d, z1.s
-; CHECK-NEXT: uunpklo z1.d, z1.s
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, uxtw #2]
-; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, uxtw #2]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw #2]
; CHECK-NEXT: ret
%ext = zext <vscale x 4 x i32> %indexes to <vscale x 4 x i64>
%ptrs = getelementptr float, float* %base, <vscale x 4 x i64> %ext
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-unscaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-unscaled.ll
index d648e699be82..ff9dd40416bf 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-unscaled.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-unscaled.ll
@@ -8,12 +8,7 @@
define void @masked_scatter_nxv2i8_sext_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i8_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -25,12 +20,7 @@ define void @masked_scatter_nxv2i8_sext_offsets(<vscale x 2 x i8> %data, i8* %ba
define void @masked_scatter_nxv2i16_sext_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i16_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -42,12 +32,7 @@ define void @masked_scatter_nxv2i16_sext_offsets(<vscale x 2 x i16> %data, i8* %
define void @masked_scatter_nxv2i32_sext_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i32_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -59,12 +44,7 @@ define void @masked_scatter_nxv2i32_sext_offsets(<vscale x 2 x i32> %data, i8* %
define void @masked_scatter_nxv2i64_sext_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i64_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -76,12 +56,7 @@ define void @masked_scatter_nxv2i64_sext_offsets(<vscale x 2 x i64> %data, i8* %
define void @masked_scatter_nxv2f16_sext_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f16_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -93,12 +68,7 @@ define void @masked_scatter_nxv2f16_sext_offsets(<vscale x 2 x half> %data, i8*
define void @masked_scatter_nxv2bf16_sext_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv2bf16_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -110,12 +80,7 @@ define void @masked_scatter_nxv2bf16_sext_offsets(<vscale x 2 x bfloat> %data, i
define void @masked_scatter_nxv2f32_sext_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f32_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -127,12 +92,7 @@ define void @masked_scatter_nxv2f32_sext_offsets(<vscale x 2 x float> %data, i8*
define void @masked_scatter_nxv2f64_sext_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f64_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: sxtw z1.d, p1/m, z1.d
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -144,11 +104,7 @@ define void @masked_scatter_nxv2f64_sext_offsets(<vscale x 2 x double> %data, i8
define void @masked_scatter_nxv2i8_zext_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i8_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -160,11 +116,7 @@ define void @masked_scatter_nxv2i8_zext_offsets(<vscale x 2 x i8> %data, i8* %ba
define void @masked_scatter_nxv2i16_zext_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i16_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -176,11 +128,7 @@ define void @masked_scatter_nxv2i16_zext_offsets(<vscale x 2 x i16> %data, i8* %
define void @masked_scatter_nxv2i32_zext_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i32_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -192,11 +140,7 @@ define void @masked_scatter_nxv2i32_zext_offsets(<vscale x 2 x i32> %data, i8* %
define void @masked_scatter_nxv2i64_zext_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i64_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -208,11 +152,7 @@ define void @masked_scatter_nxv2i64_zext_offsets(<vscale x 2 x i64> %data, i8* %
define void @masked_scatter_nxv2f16_zext_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f16_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -224,11 +164,7 @@ define void @masked_scatter_nxv2f16_zext_offsets(<vscale x 2 x half> %data, i8*
define void @masked_scatter_nxv2bf16_zext_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv2bf16_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -240,11 +176,7 @@ define void @masked_scatter_nxv2bf16_zext_offsets(<vscale x 2 x bfloat> %data, i
define void @masked_scatter_nxv2f32_zext_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f32_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -256,11 +188,7 @@ define void @masked_scatter_nxv2f32_zext_offsets(<vscale x 2 x float> %data, i8*
define void @masked_scatter_nxv2f64_zext_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i32> %i32offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f64_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 2 x i32> %i32offsets to <vscale x 2 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
@@ -275,19 +203,7 @@ define void @masked_scatter_nxv2f64_zext_offsets(<vscale x 2 x double> %data, i8
define void @masked_scatter_nxv4i8_sext_offsets(<vscale x 4 x i8> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i8_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: sunpklo z3.d, z1.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1b { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1b { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -299,19 +215,7 @@ define void @masked_scatter_nxv4i8_sext_offsets(<vscale x 4 x i8> %data, i8* %ba
define void @masked_scatter_nxv4i16_sext_offsets(<vscale x 4 x i16> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i16_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: sunpklo z3.d, z1.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -323,19 +227,7 @@ define void @masked_scatter_nxv4i16_sext_offsets(<vscale x 4 x i16> %data, i8* %
define void @masked_scatter_nxv4i32_sext_offsets(<vscale x 4 x i32> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i32_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: sunpklo z3.d, z1.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -347,19 +239,7 @@ define void @masked_scatter_nxv4i32_sext_offsets(<vscale x 4 x i32> %data, i8* %
define void @masked_scatter_nxv4f16_sext_offsets(<vscale x 4 x half> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4f16_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: sunpklo z3.d, z1.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -371,19 +251,7 @@ define void @masked_scatter_nxv4f16_sext_offsets(<vscale x 4 x half> %data, i8*
define void @masked_scatter_nxv4bf16_sext_offsets(<vscale x 4 x bfloat> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv4bf16_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: sunpklo z3.d, z1.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -395,19 +263,7 @@ define void @masked_scatter_nxv4bf16_sext_offsets(<vscale x 4 x bfloat> %data, i
define void @masked_scatter_nxv4f32_sext_offsets(<vscale x 4 x float> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv4f32_sext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: sunpklo z3.d, z1.s
-; CHECK-NEXT: sunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw]
; CHECK-NEXT: ret
%offsets = sext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -419,19 +275,7 @@ define void @masked_scatter_nxv4f32_sext_offsets(<vscale x 4 x float> %data, i8*
define void @masked_scatter_nxv4i8_zext_offsets(<vscale x 4 x i8> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i8_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1b { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1b { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -443,19 +287,7 @@ define void @masked_scatter_nxv4i8_zext_offsets(<vscale x 4 x i8> %data, i8* %ba
define void @masked_scatter_nxv4i16_zext_offsets(<vscale x 4 x i16> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i16_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -467,19 +299,7 @@ define void @masked_scatter_nxv4i16_zext_offsets(<vscale x 4 x i16> %data, i8* %
define void @masked_scatter_nxv4i32_zext_offsets(<vscale x 4 x i32> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4i32_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -491,19 +311,7 @@ define void @masked_scatter_nxv4i32_zext_offsets(<vscale x 4 x i32> %data, i8* %
define void @masked_scatter_nxv4f16_zext_offsets(<vscale x 4 x half> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv4f16_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -515,19 +323,7 @@ define void @masked_scatter_nxv4f16_zext_offsets(<vscale x 4 x half> %data, i8*
define void @masked_scatter_nxv4bf16_zext_offsets(<vscale x 4 x bfloat> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv4bf16_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
@@ -539,19 +335,7 @@ define void @masked_scatter_nxv4bf16_zext_offsets(<vscale x 4 x bfloat> %data, i
define void @masked_scatter_nxv4f32_zext_offsets(<vscale x 4 x float> %data, i8* %base, <vscale x 4 x i32> %i32offsets, <vscale x 4 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv4f32_zext_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: uunpklo z3.d, z1.s
-; CHECK-NEXT: uunpkhi z1.d, z1.s
-; CHECK-NEXT: pfalse p1.b
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: add z2.d, z2.d, z3.d
-; CHECK-NEXT: uunpklo z3.d, z0.s
-; CHECK-NEXT: uunpkhi z0.d, z0.s
-; CHECK-NEXT: zip1 p2.s, p0.s, p1.s
-; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
-; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d]
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw]
; CHECK-NEXT: ret
%offsets = zext <vscale x 4 x i32> %i32offsets to <vscale x 4 x i64>
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 4 x i64> %offsets
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-scatter-64b-unscaled.ll b/llvm/test/CodeGen/AArch64/sve-masked-scatter-64b-unscaled.ll
index bb655a9ea90a..0f81e286f436 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-scatter-64b-unscaled.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-scatter-64b-unscaled.ll
@@ -8,10 +8,7 @@
define void @masked_scatter_nxv2i8_unscaled_64bit_offsets(<vscale x 2 x i8> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i8_unscaled_64bit_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i8*>
@@ -22,10 +19,7 @@ define void @masked_scatter_nxv2i8_unscaled_64bit_offsets(<vscale x 2 x i8> %dat
define void @masked_scatter_nxv2i16_unscaled_64bit_offsets(<vscale x 2 x i16> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i16_unscaled_64bit_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i16*>
@@ -36,10 +30,7 @@ define void @masked_scatter_nxv2i16_unscaled_64bit_offsets(<vscale x 2 x i16> %d
define void @masked_scatter_nxv2i32_unscaled_64bit_offsets(<vscale x 2 x i32> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i32_unscaled_64bit_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i32*>
@@ -50,10 +41,7 @@ define void @masked_scatter_nxv2i32_unscaled_64bit_offsets(<vscale x 2 x i32> %d
define void @masked_scatter_nxv2i64_unscaled_64bit_offsets(<vscale x 2 x i64> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2i64_unscaled_64bit_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x i64*>
@@ -64,10 +52,7 @@ define void @masked_scatter_nxv2i64_unscaled_64bit_offsets(<vscale x 2 x i64> %d
define void @masked_scatter_nxv2f16_unscaled_64bit_offsets(<vscale x 2 x half> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind {
; CHECK-LABEL: masked_scatter_nxv2f16_unscaled_64bit_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x half*>
@@ -78,10 +63,7 @@ define void @masked_scatter_nxv2f16_unscaled_64bit_offsets(<vscale x 2 x half> %
define void @masked_scatter_nxv2bf16_unscaled_64bit_offsets(<vscale x 2 x bfloat> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv2bf16_unscaled_64bit_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x bfloat*>
@@ -92,10 +74,7 @@ define void @masked_scatter_nxv2bf16_unscaled_64bit_offsets(<vscale x 2 x bfloat
define void @masked_scatter_nxv2f32_unscaled_64bit_offsets(<vscale x 2 x float> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv2f32_unscaled_64bit_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x float*>
@@ -106,10 +85,7 @@ define void @masked_scatter_nxv2f32_unscaled_64bit_offsets(<vscale x 2 x float>
define void @masked_scatter_nxv2f64_unscaled_64bit_offsets(<vscale x 2 x double> %data, i8* %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %masks) nounwind #0 {
; CHECK-LABEL: masked_scatter_nxv2f64_unscaled_64bit_offsets:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z2.d, x0
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: add z1.d, z2.d, z1.d
-; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d]
+; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d]
; CHECK-NEXT: ret
%byte_ptrs = getelementptr i8, i8* %base, <vscale x 2 x i64> %offsets
%ptrs = bitcast <vscale x 2 x i8*> %byte_ptrs to <vscale x 2 x double*>
More information about the llvm-commits
mailing list