[llvm] Handle VECREDUCE intrinsics in NVPTX backend (PR #136253)
Princeton Ferro via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 10:47:44 PDT 2025
================
@@ -1900,6 +1917,191 @@ static SDValue getPRMT(SDValue A, SDValue B, uint64_t Selector, SDLoc DL,
return getPRMT(A, B, DAG.getConstant(Selector, DL, MVT::i32), DL, DAG, Mode);
}
+/// A generic routine for constructing a tree reduction on a vector operand.
+/// This method groups elements bottom-up, progressively building each level.
+/// Unlike the shuffle reduction used in DAGTypeLegalizer and ExpandReductions,
+/// adjacent elements are combined first, leading to shorter live ranges. This
+/// approach makes the most sense if the shuffle reduction would use the same
+/// amount of registers.
+///
+/// The flags on the original reduction operation will be propagated to
+/// each scalar operation.
+static SDValue BuildTreeReduction(
+ const SmallVector<SDValue> &Elements, EVT EltTy,
+ ArrayRef<std::pair<unsigned /*NodeType*/, unsigned /*NumInputs*/>> Ops,
+ const SDLoc &DL, const SDNodeFlags Flags, SelectionDAG &DAG) {
+ // Build the reduction tree at each level, starting with all the elements.
+ SmallVector<SDValue> Level = Elements;
+
+ unsigned OpIdx = 0;
+ while (Level.size() > 1) {
+ // Try to reduce this level using the current operator.
+ const auto [DefaultScalarOp, DefaultGroupSize] = Ops[OpIdx];
+
+ // Build the next level by partially reducing all elements.
+ SmallVector<SDValue> ReducedLevel;
+ unsigned I = 0, E = Level.size();
+ for (; I + DefaultGroupSize <= E; I += DefaultGroupSize) {
+ // Reduce elements in groups of [DefaultGroupSize], as much as possible.
+ ReducedLevel.push_back(DAG.getNode(
+ DefaultScalarOp, DL, EltTy,
+ ArrayRef<SDValue>(Level).slice(I, DefaultGroupSize), Flags));
+ }
+
+ if (I < E) {
+ // Handle leftover elements.
+
+ if (ReducedLevel.empty()) {
+ // We didn't reduce anything at this level. We need to pick a smaller
+ // operator.
+ ++OpIdx;
+ assert(OpIdx < Ops.size() && "no smaller operators for reduction");
+ continue;
+ }
+
+ // We reduced some things but there's still more left, meaning the
+ // operator's number of inputs doesn't evenly divide this level size. Move
+ // these elements to the next level.
+ for (; I < E; ++I)
+ ReducedLevel.push_back(Level[I]);
+ }
+
+ // Process the next level.
+ Level = ReducedLevel;
+ }
+
+ return *Level.begin();
+}
+
+/// Lower reductions to either a sequence of operations or a tree if
+/// reassociations are allowed. This method will use larger operations like
+/// max3/min3 when the target supports them.
+SDValue NVPTXTargetLowering::LowerVECREDUCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ const SDNodeFlags Flags = Op->getFlags();
+ SDValue Vector = Op.getOperand(0);
+ SDValue Accumulator;
+
+ EVT EltTy = Vector.getValueType().getVectorElementType();
+ const bool CanUseMinMax3 = EltTy == MVT::f32 && STI.getSmVersion() >= 100 &&
+ STI.getPTXVersion() >= 88;
+
+ // A list of SDNode opcodes with equivalent semantics, sorted descending by
+ // number of inputs they take.
+ SmallVector<std::pair<unsigned /*Op*/, unsigned /*NumIn*/>, 2> ScalarOps;
+
+ // Whether we can lower to scalar operations in an arbitrary order.
+ bool IsAssociative = allowUnsafeFPMath(DAG.getMachineFunction());
+
+ // Whether the data type and operation can be represented with fewer ops and
+ // registers in a shuffle reduction.
+ bool PrefersShuffle;
+
+ switch (Op->getOpcode()) {
----------------
Prince781 wrote:
Done.
https://github.com/llvm/llvm-project/pull/136253
More information about the llvm-commits
mailing list