[llvm] [AArch64][SVE] Add codegen support for partial reduction lowering to wide add instructions (PR #114406)

James Chesterman via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 06:12:56 PST 2024


================
@@ -21783,6 +21788,64 @@ SDValue tryLowerPartialReductionToDot(SDNode *N,
   return DAG.getNode(Opcode, DL, ReducedType, NarrowOp, A, B);
 }
 
+SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
+                                          const AArch64Subtarget *Subtarget,
+                                          SelectionDAG &DAG) {
+
+  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+         getIntrinsicID(N) ==
+             Intrinsic::experimental_vector_partial_reduce_add &&
+         "Expected a partial reduction node");
+
+  if (!Subtarget->isSVEorStreamingSVEAvailable())
+    return SDValue();
+
+  SDLoc DL(N);
+
+  auto Acc = N->getOperand(1);
+  auto ExtInput = N->getOperand(2);
+
+  EVT AccVT = Acc.getValueType();
+  EVT AccElemVT = AccVT.getVectorElementType();
+
+  if (ExtInput.getValueType().getVectorElementType() != AccElemVT)
+    return SDValue();
+
+  unsigned ExtInputOpcode = ExtInput->getOpcode();
+  if (!ISD::isExtOpcode(ExtInputOpcode))
+    return SDValue();
+
+  auto Input = ExtInput->getOperand(0);
+  EVT InputVT = Input.getValueType();
+
+  if (!(InputVT == MVT::nxv4i32 && AccVT == MVT::nxv2i64) &&
+      !(InputVT == MVT::nxv8i16 && AccVT == MVT::nxv4i32) &&
+      !(InputVT == MVT::nxv16i8 && AccVT == MVT::nxv8i16))
+    return SDValue();
+
+  // To do this transformation, output element size needs to be double input
+  // element size, and output number of elements needs to be half the input
+  // number of elements
+  if (InputVT.getVectorElementType().getSizeInBits() * 2 !=
+          AccElemVT.getSizeInBits() ||
+      AccVT.getVectorElementCount() * 2 != InputVT.getVectorElementCount() ||
----------------
JamesChesterman wrote:

Done

https://github.com/llvm/llvm-project/pull/114406


More information about the llvm-commits mailing list