[llvm-branch-commits] [llvm] 350247a - [AArch64] Rearrange mul(dup(sext/zext)) to mul(sext/zext(dup))
Nicholas Guy via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 6 08:15:17 PST 2021
Author: Nicholas Guy
Date: 2021-01-06T16:02:16Z
New Revision: 350247a93c07906300b79955ff882004a92ae368
URL: https://github.com/llvm/llvm-project/commit/350247a93c07906300b79955ff882004a92ae368
DIFF: https://github.com/llvm/llvm-project/commit/350247a93c07906300b79955ff882004a92ae368.diff
LOG: [AArch64] Rearrange mul(dup(sext/zext)) to mul(sext/zext(dup))
Performing this rearrangement allows for existing patterns
to match cases where the vector may be built after an extend,
instead of before.
Differential Revision: https://reviews.llvm.org/D91255
Added:
llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 41dc285a368d..40435c12ca3b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11705,9 +11705,152 @@ static bool IsSVECntIntrinsic(SDValue S) {
return false;
}
+/// Calculates what the pre-extend type is, based on the extension
+/// operation node provided by \p Extend.
+///
+/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
+/// pre-extend type is pulled directly from the operand, while other extend
+/// operations need a bit more inspection to get this information.
+///
+/// \param Extend The SDNode from the DAG that represents the extend operation
+/// \param DAG The SelectionDAG hosting the \p Extend node
+///
+/// \returns The type representing the \p Extend source type, or \p MVT::Other
+/// if no valid type can be determined
+static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
+ switch (Extend.getOpcode()) {
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ return Extend.getOperand(0).getValueType();
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ case ISD::SIGN_EXTEND_INREG: {
+ VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
+ if (!TypeNode)
+ return MVT::Other;
+ return TypeNode->getVT();
+ }
+ case ISD::AND: {
+ ConstantSDNode *Constant =
+ dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
+ if (!Constant)
+ return MVT::Other;
+
+ uint32_t Mask = Constant->getZExtValue();
+
+ if (Mask == UCHAR_MAX)
+ return MVT::i8;
+ else if (Mask == USHRT_MAX)
+ return MVT::i16;
+ else if (Mask == UINT_MAX)
+ return MVT::i32;
+
+ return MVT::Other;
+ }
+ default:
+ return MVT::Other;
+ }
+
+ llvm_unreachable("Code path unhandled in calculatePreExtendType!");
+}
+
+/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
+/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
+static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
+ SelectionDAG &DAG) {
+
+ ShuffleVectorSDNode *ShuffleNode =
+ dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
+ if (!ShuffleNode)
+ return SDValue();
+
+ // Ensuring the mask is zero before continuing
+ if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
+ return SDValue();
+
+ SDValue InsertVectorElt = VectorShuffle.getOperand(0);
+
+ if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+
+ SDValue InsertLane = InsertVectorElt.getOperand(2);
+ ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
+ // Ensures the insert is inserting into lane 0
+ if (!Constant || Constant->getZExtValue() != 0)
+ return SDValue();
+
+ SDValue Extend = InsertVectorElt.getOperand(1);
+ unsigned ExtendOpcode = Extend.getOpcode();
+
+ bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
+ ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
+ ExtendOpcode == ISD::AssertSext;
+ if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
+ ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
+ return SDValue();
+
+ EVT TargetType = VectorShuffle.getValueType();
+ EVT PreExtendType = calculatePreExtendType(Extend, DAG);
+
+ if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
+ TargetType != MVT::v2i64) ||
+ (PreExtendType == MVT::Other))
+ return SDValue();
+
+ EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
+
+ if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
+ return SDValue();
+
+ if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
+ return SDValue();
+
+ SDLoc DL(VectorShuffle);
+
+ SDValue InsertVectorNode = DAG.getNode(
+ InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
+ Extend.getOperand(0), DAG.getConstant(0, DL, MVT::i64));
+
+ std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
+
+ SDValue VectorShuffleNode =
+ DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
+ DAG.getUNDEF(PreExtendVT), ShuffleMask);
+
+ SDValue ExtendNode =
+ DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, TargetType,
+ VectorShuffleNode, DAG.getValueType(TargetType));
+
+ return ExtendNode;
+}
+
+/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
+/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
+static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
+ // If the value type isn't a vector, none of the operands are going to be dups
+ if (!Mul->getValueType(0).isVector())
+ return SDValue();
+
+ SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
+ SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
+
+ // Neither operands have been changed, don't make any further changes
+ if (!Op0 && !Op1)
+ return SDValue();
+
+ SDLoc DL(Mul);
+ return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
+ Op0 ? Op0 : Mul->getOperand(0),
+ Op1 ? Op1 : Mul->getOperand(1));
+}
+
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
+
+ if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
+ return Ext;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
new file mode 100644
index 000000000000..082e2db52705
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sve | FileCheck %s
+
+define <vscale x 2 x i16> @dupsext_v2i8_v2i16(i8 %src, <vscale x 2 x i16> %b) {
+; CHECK-LABEL: dupsext_v2i8_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i16
+ %broadcast.splatinsert = insertelement <vscale x 2 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <vscale x 2 x i16> %broadcast.splatinsert, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nsw <vscale x 2 x i16> %broadcast.splat, %b
+ ret <vscale x 2 x i16> %out
+}
+
+define <vscale x 4 x i16> @dupsext_v4i8_v4i16(i8 %src, <vscale x 4 x i16> %b) {
+; CHECK-LABEL: dupsext_v4i8_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i16
+ %broadcast.splatinsert = insertelement <vscale x 4 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <vscale x 4 x i16> %broadcast.splatinsert, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %out = mul nsw <vscale x 4 x i16> %broadcast.splat, %b
+ ret <vscale x 4 x i16> %out
+}
+
+define <vscale x 8 x i16> @dupsext_v8i8_v8i16(i8 %src, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: dupsext_v8i8_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i16
+ %broadcast.splatinsert = insertelement <vscale x 8 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <vscale x 8 x i16> %broadcast.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %out = mul nsw <vscale x 8 x i16> %broadcast.splat, %b
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 2 x i32> @dupsext_v2i8_v2i32(i8 %src, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: dupsext_v2i8_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i32
+ %broadcast.splatinsert = insertelement <vscale x 2 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <vscale x 2 x i32> %broadcast.splatinsert, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nsw <vscale x 2 x i32> %broadcast.splat, %b
+ ret <vscale x 2 x i32> %out
+}
+
+define <vscale x 4 x i32> @dupsext_v4i8_v4i32(i8 %src, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: dupsext_v4i8_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i32
+ %broadcast.splatinsert = insertelement <vscale x 4 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %out = mul nsw <vscale x 4 x i32> %broadcast.splat, %b
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @dupsext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: dupsext_v2i8_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtb x8, w0
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i64
+ %broadcast.splatinsert = insertelement <vscale x 2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <vscale x 2 x i64> %broadcast.splatinsert, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nsw <vscale x 2 x i64> %broadcast.splat, %b
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i32> @dupsext_v2i16_v2i32(i16 %src, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: dupsext_v2i16_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = sext i16 %src to i32
+ %broadcast.splatinsert = insertelement <vscale x 2 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <vscale x 2 x i32> %broadcast.splatinsert, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nsw <vscale x 2 x i32> %broadcast.splat, %b
+ ret <vscale x 2 x i32> %out
+}
+
+define <vscale x 4 x i32> @dupsext_v4i16_v4i32(i16 %src, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: dupsext_v4i16_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxth w8, w0
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+entry:
+ %in = sext i16 %src to i32
+ %broadcast.splatinsert = insertelement <vscale x 4 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %out = mul nsw <vscale x 4 x i32> %broadcast.splat, %b
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @dupsext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: dupsext_v2i16_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxth x8, w0
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = sext i16 %src to i64
+ %broadcast.splatinsert = insertelement <vscale x 2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <vscale x 2 x i64> %broadcast.splatinsert, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nsw <vscale x 2 x i64> %broadcast.splat, %b
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @dupsext_v2i32_v2i64(i32 %src, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: dupsext_v2i32_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sxtw x8, w0
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = sext i32 %src to i64
+ %broadcast.splatinsert = insertelement <vscale x 2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <vscale x 2 x i64> %broadcast.splatinsert, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nsw <vscale x 2 x i64> %broadcast.splat, %b
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i16> @dupzext_v2i8_v2i16(i8 %src, <vscale x 2 x i16> %b) {
+; CHECK-LABEL: dupzext_v2i8_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = zext i8 %src to i16
+ %broadcast.splatinsert = insertelement <vscale x 2 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <vscale x 2 x i16> %broadcast.splatinsert, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nuw <vscale x 2 x i16> %broadcast.splat, %b
+ ret <vscale x 2 x i16> %out
+}
+
+define <vscale x 4 x i16> @dupzext_v4i8_v4i16(i8 %src, <vscale x 4 x i16> %b) {
+; CHECK-LABEL: dupzext_v4i8_v4i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+entry:
+ %in = zext i8 %src to i16
+ %broadcast.splatinsert = insertelement <vscale x 4 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <vscale x 4 x i16> %broadcast.splatinsert, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %out = mul nuw <vscale x 4 x i16> %broadcast.splat, %b
+ ret <vscale x 4 x i16> %out
+}
+
+define <vscale x 8 x i16> @dupzext_v8i8_v8i16(i8 %src, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: dupzext_v8i8_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mov z1.h, w8
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+entry:
+ %in = zext i8 %src to i16
+ %broadcast.splatinsert = insertelement <vscale x 8 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <vscale x 8 x i16> %broadcast.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %out = mul nuw <vscale x 8 x i16> %broadcast.splat, %b
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 2 x i32> @dupzext_v2i8_v2i32(i8 %src, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: dupzext_v2i8_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = zext i8 %src to i32
+ %broadcast.splatinsert = insertelement <vscale x 2 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <vscale x 2 x i32> %broadcast.splatinsert, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nuw <vscale x 2 x i32> %broadcast.splat, %b
+ ret <vscale x 2 x i32> %out
+}
+
+define <vscale x 4 x i32> @dupzext_v4i8_v4i32(i8 %src, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: dupzext_v4i8_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+entry:
+ %in = zext i8 %src to i32
+ %broadcast.splatinsert = insertelement <vscale x 4 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %out = mul nuw <vscale x 4 x i32> %broadcast.splat, %b
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @dupzext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: dupzext_v2i8_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: and x8, x0, #0xff
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = zext i8 %src to i64
+ %broadcast.splatinsert = insertelement <vscale x 2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <vscale x 2 x i64> %broadcast.splatinsert, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nuw <vscale x 2 x i64> %broadcast.splat, %b
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i32> @dupzext_v2i16_v2i32(i16 %src, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: dupzext_v2i16_v2i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = zext i16 %src to i32
+ %broadcast.splatinsert = insertelement <vscale x 2 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <vscale x 2 x i32> %broadcast.splatinsert, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nuw <vscale x 2 x i32> %broadcast.splat, %b
+ ret <vscale x 2 x i32> %out
+}
+
+define <vscale x 4 x i32> @dupzext_v4i16_v4i32(i16 %src, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: dupzext_v4i16_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+entry:
+ %in = zext i16 %src to i32
+ %broadcast.splatinsert = insertelement <vscale x 4 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %out = mul nuw <vscale x 4 x i32> %broadcast.splat, %b
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @dupzext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: dupzext_v2i16_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: and x8, x0, #0xffff
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = zext i16 %src to i64
+ %broadcast.splatinsert = insertelement <vscale x 2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <vscale x 2 x i64> %broadcast.splatinsert, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nuw <vscale x 2 x i64> %broadcast.splat, %b
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @dupzext_v2i32_v2i64(i32 %src, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: dupzext_v2i32_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: mov z1.d, x8
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+entry:
+ %in = zext i32 %src to i64
+ %broadcast.splatinsert = insertelement <vscale x 2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <vscale x 2 x i64> %broadcast.splatinsert, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %out = mul nuw <vscale x 2 x i64> %broadcast.splat, %b
+ ret <vscale x 2 x i64> %out
+}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
new file mode 100644
index 000000000000..07ac3c87d143
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s
+
+; Supported combines
+
+define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
+; CHECK-LABEL: dupsext_v8i8_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: dup v1.8b, w0
+; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i16
+ %ext.b = sext <8 x i8> %b to <8 x i16>
+ %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+ %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
+ ret <8 x i16> %out
+}
+
+define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
+; CHECK-LABEL: dupzext_v8i8_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: dup v1.8b, w0
+; CHECK-NEXT: umull v0.8h, v1.8b, v0.8b
+; CHECK-NEXT: ret
+entry:
+ %in = zext i8 %src to i16
+ %ext.b = zext <8 x i8> %b to <8 x i16>
+ %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+ %out = mul nuw <8 x i16> %broadcast.splat, %ext.b
+ ret <8 x i16> %out
+}
+
+define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
+; CHECK-LABEL: dupsext_v4i16_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: dup v1.4h, w0
+; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h
+; CHECK-NEXT: ret
+entry:
+ %in = sext i16 %src to i32
+ %ext.b = sext <4 x i16> %b to <4 x i32>
+ %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ %out = mul nsw <4 x i32> %broadcast.splat, %ext.b
+ ret <4 x i32> %out
+}
+
+define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
+; CHECK-LABEL: dupzext_v4i16_v4i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: dup v1.4h, w0
+; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h
+; CHECK-NEXT: ret
+entry:
+ %in = zext i16 %src to i32
+ %ext.b = zext <4 x i16> %b to <4 x i32>
+ %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
+ %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ %out = mul nuw <4 x i32> %broadcast.splat, %ext.b
+ ret <4 x i32> %out
+}
+
+define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
+; CHECK-LABEL: dupsext_v2i32_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: dup v1.2s, w0
+; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %in = sext i32 %src to i64
+ %ext.b = sext <2 x i32> %b to <2 x i64>
+ %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ %out = mul nsw <2 x i64> %broadcast.splat, %ext.b
+ ret <2 x i64> %out
+}
+
+define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
+; CHECK-LABEL: dupzext_v2i32_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: dup v1.2s, w0
+; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %in = zext i32 %src to i64
+ %ext.b = zext <2 x i32> %b to <2 x i64>
+ %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
+ ret <2 x i64> %out
+}
+
+; Unsupported combines
+
+define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
+; CHECK-LABEL: dupsext_v2i8_v2i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: shl v0.2s, v0.2s, #24
+; CHECK-NEXT: sshr v0.2s, v0.2s, #24
+; CHECK-NEXT: dup v1.2s, w8
+; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i16
+ %ext.b = sext <2 x i8> %b to <2 x i16>
+ %broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0
+ %broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
+ %out = mul nsw <2 x i16> %broadcast.splat, %ext.b
+ ret <2 x i16> %out
+}
+
+define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
+; CHECK-LABEL: dupzext_v2i16_v2i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: and x8, x0, #0xffff
+; CHECK-NEXT: fmov x10, d0
+; CHECK-NEXT: mov x9, v0.d[1]
+; CHECK-NEXT: mul x10, x8, x10
+; CHECK-NEXT: mul x8, x8, x9
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: mov v0.d[1], x8
+; CHECK-NEXT: ret
+entry:
+ %in = zext i16 %src to i64
+ %ext.b = zext <2 x i16> %b to <2 x i64>
+ %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
+ %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
+ ret <2 x i64> %out
+}
+
+; dupsext_v4i8_v4i16
+; dupsext_v2i8_v2i32
+; dupsext_v4i8_v4i32
+; dupsext_v2i8_v2i64
+; dupsext_v2i16_v2i32
+; dupsext_v2i16_v2i64
+; dupzext_v2i8_v2i16
+; dupzext_v4i8_v4i16
+; dupzext_v2i8_v2i32
+; dupzext_v4i8_v4i32
+; dupzext_v2i8_v2i64
+; dupzext_v2i16_v2i32
+; dupzext_v2i16_v2i64
+
+; Unsupported states
+
+define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
+; CHECK-LABEL: nonsplat_shuffleinsert:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sxtb w8, w0
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+entry:
+ %in = sext i8 %src to i16
+ %ext.b = sext <8 x i8> %b to <8 x i16>
+ %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1
+ %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
+ %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
+ ret <8 x i16> %out
+}
+
+define <8 x i16> @missing_insert(<8 x i8> %b) {
+; CHECK-LABEL: missing_insert:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+entry:
+ %ext.b = sext <8 x i8> %b to <8 x i16>
+ %broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
+ %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
+ ret <8 x i16> %out
+}
More information about the llvm-branch-commits
mailing list