[llvm] [AArch64][SVE] Add codegen support for partial reduction lowering to wide add instructions (PR #114406)
James Chesterman via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 06:12:56 PST 2024
================
@@ -21783,6 +21788,64 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
return DAG.getNode(Opcode, DL, ReducedType, NarrowOp, A, B);
}
+SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
+ const AArch64Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+
+ assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ getIntrinsicID(N) ==
+ Intrinsic::experimental_vector_partial_reduce_add &&
+ "Expected a partial reduction node");
+
+ if (!Subtarget->isSVEorStreamingSVEAvailable())
+ return SDValue();
+
+ SDLoc DL(N);
+
+ auto Acc = N->getOperand(1);
+ auto ExtInput = N->getOperand(2);
+
+ EVT AccVT = Acc.getValueType();
+ EVT AccElemVT = AccVT.getVectorElementType();
+
+ if (ExtInput.getValueType().getVectorElementType() != AccElemVT)
+ return SDValue();
+
+ unsigned ExtInputOpcode = ExtInput->getOpcode();
+ if (!ISD::isExtOpcode(ExtInputOpcode))
+ return SDValue();
+
+ auto Input = ExtInput->getOperand(0);
+ EVT InputVT = Input.getValueType();
+
+ if (!(InputVT == MVT::nxv4i32 && AccVT == MVT::nxv2i64) &&
+ !(InputVT == MVT::nxv8i16 && AccVT == MVT::nxv4i32) &&
+ !(InputVT == MVT::nxv16i8 && AccVT == MVT::nxv8i16))
+ return SDValue();
+
+ // To do this transformation, output element size needs to be double input
+ // element size, and output number of elements needs to be half the input
+ // number of elements
+ if (InputVT.getVectorElementType().getSizeInBits() * 2 !=
+ AccElemVT.getSizeInBits() ||
+ AccVT.getVectorElementCount() * 2 != InputVT.getVectorElementCount() ||
----------------
JamesChesterman wrote:
Done
https://github.com/llvm/llvm-project/pull/114406
More information about the llvm-commits
mailing list