[PATCH] D88577: [AArch64] Generate udot for v16i8 sum reduction to i32
Vinay Madhusudan via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 30 08:24:38 PDT 2020
mivnay created this revision.
Herald added subscribers: llvm-commits, danielkiss, kristof.beyls.
Herald added a project: LLVM.
mivnay requested review of this revision.
Convert VECREDUCE_ADD( ZERO_EXTEND(v16i8_type) ) to VECREDUCE_ADD( UDOTv16i8(v16i8_type) ) whenever the result type is i32. This gains in one of the SPECCPU 2017 benchmark.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D88577
Files:
lib/Target/AArch64/AArch64ISelLowering.cpp
test/CodeGen/AArch64/neon-dot-product.ll
Index: test/CodeGen/AArch64/neon-dot-product.ll
===================================================================
--- test/CodeGen/AArch64/neon-dot-product.ll
+++ test/CodeGen/AArch64/neon-dot-product.ll
@@ -255,6 +255,17 @@
ret i32 %op.extra
}
+define i32 @test_udot_v16i8_2(i8* nocapture readonly %a1) {
+entry:
+; CHECK-LABEL: test_udot_v16i8_2:
+; CHECK: udot {{v[0-9]+}}.4s, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
+ %0 = bitcast i8* %a1 to <16 x i8>*
+ %1 = load <16 x i8>, <16 x i8>* %0
+ %2 = zext <16 x i8> %1 to <16 x i32>
+ %3 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %2)
+ ret i32 %3
+}
+
define i32 @test_sdot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) {
entry:
; CHECK-LABEL: test_sdot_v16i8:
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -770,6 +770,7 @@
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::VECREDUCE_ADD);
setTargetDAGCombine(ISD::GlobalAddress);
@@ -10939,6 +10940,40 @@
}
return SDValue();
}
+
+// VECREDUCE_ADD( ZERO_EXTEND(v16i8_type) ) to
+// VECREDUCE_ADD( UDOTv16i8(v16i8_type) )
+static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *ST) {
+ SDValue Op0 = N->getOperand(0);
+ if (!ST->hasDotProd() || N->getValueType(0) != MVT::i32) {
+ return SDValue();
+ }
+
+ EVT VT = Op0.getValueType();
+ if (VT.isScalableVector() || VT.getVectorElementType() != MVT::i32) {
+ return SDValue();
+ }
+
+ EVT Op0VT = Op0.getOperand(0).getValueType();
+ if (Op0.getOpcode() != ISD::ZERO_EXTEND || Op0VT.isScalableVector() ||
+ Op0VT.getVectorElementType() != MVT::i8 ||
+ Op0VT.getVectorNumElements() != 16) {
+ return SDValue();
+ }
+
+ SDValue Ones =
+ DAG.getConstant(1, SDLoc(Op0), Op0.getOperand(0).getValueType());
+ SDValue Zeros = DAG.getConstant(
+ 0, SDLoc(Op0), EVT::getVectorVT(*DAG.getContext(), MVT::i32, 4));
+ MachineSDNode *ABD =
+ DAG.getMachineNode(AArch64::UDOTv16i8, SDLoc(Op0), Zeros.getValueType(),
+ Zeros, Ones, Op0.getOperand(0));
+ SDValue FinalABD = DAG.getNode(ISD::VECREDUCE_ADD, SDLoc(N),
+ N->getValueType(0), SDValue(ABD, 0));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), FinalABD);
+ return FinalABD;
+}
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -14622,6 +14657,8 @@
return performPostLD1Combine(N, DCI, true);
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DAG);
+ case ISD::VECREDUCE_ADD:
+ return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88577.295295.patch
Type: text/x-patch
Size: 3142 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200930/0ab28169/attachment.bin>
More information about the llvm-commits
mailing list