[PATCH] D40136: [ARM] Fix missed vpadd combine opportunity.
Yvan Roux via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 16 09:30:38 PST 2017
yroux created this revision.
Herald added subscribers: kristof.beyls, aemerson.
This fixes Bug 32999 <https://bugs.llvm.org/show_bug.cgi?id=32999>.
Adds pattern recognition for ADD(EXTR_SUBV(VUZP.0), EXTR_SUBV(VUZP.1)).
Passed regression tests (x86 host) and nightly test-suite (x86 and ARM hosts)
without regressions.
https://reviews.llvm.org/D40136
Files:
lib/Target/ARM/ARMISelLowering.cpp
test/CodeGen/ARM/vpadd.ll
Index: test/CodeGen/ARM/vpadd.ll
===================================================================
--- test/CodeGen/ARM/vpadd.ll
+++ test/CodeGen/ARM/vpadd.ll
@@ -385,6 +385,26 @@
ret void
}
+; PR32999: combine vuzp+add->vpadd
+define void @pr32999(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {
+; CHECK-LABEL: pr32999:
+; CHECK: @ BB#0:
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vmovl.u8 q8, d16
+; CHECK-NEXT: vpadd.i16 d16, d16, d17
+; CHECK-NEXT: vstr d16, [r1]
+; CHECK-NEXT: mov pc, lr
+ %tmp = load <16 x i8>, <16 x i8>* %cbcr
+ %tmp1 = zext <16 x i8> %tmp to <16 x i16>
+ %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
+ %tmp2a = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+ %tmp3a = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %add = add <4 x i16> %tmp2a, %tmp3a
+ store <4 x i16> %add, <4 x i16>* %X, align 8
+ ret void
+}
+
; Combine vuzp+vaddl->vpaddl
define void @addCombineToVPADDLq_s16(<8 x i16> *%cbcr, <4 x i32> *%X) nounwind ssp {
; CHECK-LABEL: addCombineToVPADDLq_s16:
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -9420,6 +9420,48 @@
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
}
+static SDValue AddCombineVUZPToVPADD(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
+ if (!N->getValueType(0).is64BitVector())
+ return SDValue();
+
+ // Check for ADD(EXTR_SUBV(VUZP.0), EXTR_SUBV(VUZP.1)).
+ if (!(N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOpcode() == ISD::EXTRACT_SUBVECTOR))
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+
+ if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
+ N00 == N10)
+ return SDValue();
+
+ // Generate vpadd with the right subvectors.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(N);
+ SDNode *Unzip = N00.getNode();
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue extract0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ Unzip->getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue extract1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ Unzip->getOperand(0),
+ DAG.getIntPtrConstant(NumElts, dl));
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
+ TLI.getPointerTy(DAG.getDataLayout())));
+ Ops.push_back(extract0);
+ Ops.push_back(extract1);
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
+}
+
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -9926,6 +9968,9 @@
if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
return Result;
+ if (SDValue Result = AddCombineVUZPToVPADD(N, N0, N1, DCI, Subtarget))
+ return Result;
+
// Attempt to create vpaddl for this add.
if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
return Result;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D40136.123199.patch
Type: text/x-patch
Size: 3804 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171116/f9948b22/attachment-0001.bin>
More information about the llvm-commits
mailing list