[llvm] [AArch64][SVE] Add codegen support for partial reduction lowering to wide add instructions (PR #114406)
Graham Hunter via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 07:45:33 PDT 2024
================
@@ -21783,6 +21784,62 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
return DAG.getNode(Opcode, DL, ReducedType, NarrowOp, A, B);
}
+SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
+ const AArch64Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+
+ assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ getIntrinsicID(N) ==
+ Intrinsic::experimental_vector_partial_reduce_add &&
+ "Expected a partial reduction node");
+
+ bool Scalable = N->getValueType(0).isScalableVector();
+ if (Scalable && !Subtarget->isSVEorStreamingSVEAvailable())
+ return SDValue();
+
+ SDLoc DL(N);
+
+ auto Accumulator = N->getOperand(1);
+ auto ExtInput = N->getOperand(2);
+
+ EVT AccumulatorType = Accumulator.getValueType();
+ EVT AccumulatorElementType = AccumulatorType.getVectorElementType();
+
+ if (ExtInput.getValueType().getVectorElementType() != AccumulatorElementType)
+ return SDValue();
+
+ unsigned ExtInputOpcode = ExtInput->getOpcode();
+ if (!ISD::isExtOpcode(ExtInputOpcode))
+ return SDValue();
+
+ auto Input = ExtInput->getOperand(0);
+ EVT InputType = Input.getValueType();
+
+ // To do this transformation, output element size needs to be double input
+ // element size, and output number of elements needs to be half the input
+ // number of elements
+ if (!(InputType.getVectorElementType().getSizeInBits() * 2 ==
----------------
huntergr-arm wrote:
Can remove the `!(a == b)` in favour of `a != b`. `||` is lower precedence so we don't need extra parens here.
https://github.com/llvm/llvm-project/pull/114406
More information about the llvm-commits
mailing list