[llvm] [LV] Vectorize FMax w/o fast-math flags. (PR #146711)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 13 04:17:50 PDT 2025
================
@@ -628,3 +628,118 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
}
}
+
+bool VPlanTransforms::handleFMaxReductionsWithoutFastMath(VPlan &Plan) {
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPReductionPHIRecipe *RedPhiR = nullptr;
+ VPRecipeWithIRFlags *MaxOp = nullptr;
+ VPWidenIntOrFpInductionRecipe *WideIV = nullptr;
+
+ // Check if there are any FCmpOGTSelect reductions using wide selects that we
+ // can fix up. To do so, we also need a wide canonical IV to keep track of
+ // the indices of the max values.
+ for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
+ // We need a wide canonical IV
+ if (auto *CurIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
+ if (!CurIV->isCanonical())
+ continue;
+ WideIV = CurIV;
+ continue;
+ }
+
+ // And a single FCmpOGTSelect reduction phi.
+ // TODO: Support FMin reductions as well.
+ auto *CurRedPhiR = dyn_cast<VPReductionPHIRecipe>(&R);
+ if (!CurRedPhiR)
+ continue;
+ if (RedPhiR)
+ return false;
+ if (CurRedPhiR->getRecurrenceKind() != RecurKind::FCmpOGTSelect ||
+ CurRedPhiR->isInLoop() || CurRedPhiR->isOrdered())
+ continue;
+ RedPhiR = CurRedPhiR;
+
+ // MaxOp feeding the reduction phi must be a select (either wide or a
+ // replicate recipe), where the phi is the last operand, and the compare
+ // predicate is strict. This ensures NaNs won't get propagated unless the
+ // initial value is NaN
+ VPRecipeBase *Inc = RedPhiR->getBackedgeValue()->getDefiningRecipe();
+ auto *RepR = dyn_cast<VPReplicateRecipe>(Inc);
+ if (!isa<VPWidenSelectRecipe>(Inc) &&
+ !(RepR && (isa<SelectInst>(RepR->getUnderlyingInstr()))))
+ return false;
+
+ MaxOp = cast<VPRecipeWithIRFlags>(Inc);
+ auto *Cmp = cast<VPRecipeWithIRFlags>(MaxOp->getOperand(0));
+ if (MaxOp->getOperand(1) == RedPhiR ||
+ !CmpInst::isStrictPredicate(Cmp->getPredicate()))
+ return false;
+ }
+
+ // Nothing to do.
+ if (!RedPhiR)
+ return true;
+
+ // A wide canonical IV is currently required.
+ // TODO: Create an induction if no suitable existing one is available.
+ if (!WideIV)
+ return false;
+
+ // Create a reduction that tracks the first indices where the latest maximum
+ // value has been selected. This is later used to select the max value from
+ // the partial reductions in a way that correctly handles signed zeros and
+ // NaNs in the input.
+ // Note that we do not need to check if the induction may hit the sentinel
+ // value. If the sentinel value gets hit, the final reduction value is at the
+ // last index or the maximum was never set and all lanes contain the start
+ // value. In either case, the correct value is selected.
+ unsigned IVWidth =
+ VPTypeAnalysis(Plan).inferScalarType(WideIV)->getScalarSizeInBits();
+ LLVMContext &Ctx = Plan.getScalarHeader()->getIRBasicBlock()->getContext();
+ VPValue *UMinSentinel =
+ Plan.getOrAddLiveIn(ConstantInt::get(Ctx, APInt::getMaxValue(IVWidth)));
+ auto *IdxPhi = new VPReductionPHIRecipe(nullptr, RecurKind::FindFirstIVUMin,
+ *UMinSentinel, false, false, 1);
+ IdxPhi->insertBefore(RedPhiR);
+ auto *MinIdxSel = new VPInstruction(Instruction::Select,
+ {MaxOp->getOperand(0), WideIV, IdxPhi});
+ MinIdxSel->insertAfter(MaxOp);
+ IdxPhi->addOperand(MinIdxSel);
+
+ // Find the first index of with the maximum value. This is used to extract the
----------------
fhahn wrote:
Done thanks
https://github.com/llvm/llvm-project/pull/146711
More information about the llvm-commits
mailing list