[llvm] Handle VECREDUCE intrinsics in NVPTX backend (PR #136253)
Princeton Ferro via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 01:15:47 PDT 2025
================
@@ -2038,6 +2063,259 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
}
+/// A generic routine for constructing a tree reduction on a vector operand.
+/// This method groups elements bottom-up, progressively building each level.
+/// Unlike the shuffle reduction used in DAGTypeLegalizer and ExpandReductions,
+/// adjacent elements are combined first, leading to shorter live ranges. This
+/// approach makes the most sense if the shuffle reduction would use the same
+/// amount of registers.
+///
+/// The flags on the original reduction operation will be propagated to
+/// each scalar operation.
+static SDValue BuildTreeReduction(
+ const SmallVector<SDValue> &Elements, EVT EltTy,
+ ArrayRef<std::pair<unsigned /*NodeType*/, unsigned /*NumInputs*/>> Ops,
+ const SDLoc &DL, const SDNodeFlags Flags, SelectionDAG &DAG) {
+ // Build the reduction tree at each level, starting with all the elements.
+ SmallVector<SDValue> Level = Elements;
+
+ unsigned OpIdx = 0;
+ while (Level.size() > 1) {
+ // Try to reduce this level using the current operator.
+ const auto [DefaultScalarOp, DefaultGroupSize] = Ops[OpIdx];
+
+ // Build the next level by partially reducing all elements.
+ SmallVector<SDValue> ReducedLevel;
+ unsigned I = 0, E = Level.size();
+ for (; I + DefaultGroupSize <= E; I += DefaultGroupSize) {
+ // Reduce elements in groups of [DefaultGroupSize], as much as possible.
+ ReducedLevel.push_back(DAG.getNode(
+ DefaultScalarOp, DL, EltTy,
+ ArrayRef<SDValue>(Level).slice(I, DefaultGroupSize), Flags));
+ }
+
+ if (I < E) {
+ // Handle leftover elements.
+
+ if (ReducedLevel.empty()) {
+ // We didn't reduce anything at this level. We need to pick a smaller
+ // operator.
+ ++OpIdx;
+ assert(OpIdx < Ops.size() && "no smaller operators for reduction");
+ continue;
+ }
+
+ // We reduced some things but there's still more left, meaning the
+ // operator's number of inputs doesn't evenly divide this level size. Move
+ // these elements to the next level.
+ for (; I < E; ++I)
+ ReducedLevel.push_back(Level[I]);
+ }
+
+ // Process the next level.
+ Level = ReducedLevel;
+ }
+
+ return *Level.begin();
+}
+
+/// Lower reductions to either a sequence of operations or a tree if
+/// reassociations are allowed. This method will use larger operations like
+/// max3/min3 when the target supports them.
+SDValue NVPTXTargetLowering::LowerVECREDUCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ const SDNodeFlags Flags = Op->getFlags();
+ SDValue Vector;
+ SDValue Accumulator;
+
+ if (Op->getOpcode() == ISD::VECREDUCE_SEQ_FADD ||
+ Op->getOpcode() == ISD::VECREDUCE_SEQ_FMUL) {
+ // special case with accumulator as first arg
+ Accumulator = Op.getOperand(0);
+ Vector = Op.getOperand(1);
+ } else {
+ // default case
+ Vector = Op.getOperand(0);
+ }
+
+ EVT EltTy = Vector.getValueType().getVectorElementType();
+ const bool CanUseMinMax3 = EltTy == MVT::f32 && STI.getSmVersion() >= 100 &&
+ STI.getPTXVersion() >= 88;
+
+ // A list of SDNode opcodes with equivalent semantics, sorted descending by
+ // number of inputs they take.
+ SmallVector<std::pair<unsigned /*Op*/, unsigned /*NumIn*/>, 2> ScalarOps;
+
+ // Whether we can lower to scalar operations in an arbitrary order.
+ bool IsAssociative = allowUnsafeFPMath(DAG.getMachineFunction());
+
+ // Whether the data type and operation can be represented with fewer ops and
+ // registers in a shuffle reduction.
+ bool PrefersShuffle;
+
+ switch (Op->getOpcode()) {
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_SEQ_FADD:
+ ScalarOps = {{ISD::FADD, 2}};
+ IsAssociative |= Op->getOpcode() == ISD::VECREDUCE_FADD;
+ // Prefer add.{,b}f16x2 for v2{,b}f16
+ PrefersShuffle = EltTy == MVT::f16 || EltTy == MVT::bf16;
----------------
Prince781 wrote:
Done!
https://github.com/llvm/llvm-project/pull/136253
More information about the llvm-commits
mailing list