[llvm] [LV] Convert scatter w/uniform addr and mask being header mask to scalar store. (PR #172799)
Elvis Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 9 22:14:50 PST 2026
================
@@ -1602,6 +1602,90 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
}
}
+void VPlanTransforms::narrowScatters(VPlan &Plan, VPCostContext &Ctx,
+ VFRange &Range,
+ const bool &FoldTailWithEVL) {
+ if (Plan.hasScalarVFOnly())
+ return;
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+ for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
+ if (!isa<VPWidenStoreRecipe>(&R))
+ continue;
+ // Convert an unmasked or header masked scatter with an uniform address
+ // into extract-last-lane + scalar store.
+ // TODO: Add a profitability check comparing the cost of a scatter vs.
+ // extract + scalar store.
+ auto *WidenStoreR = dyn_cast<VPWidenStoreRecipe>(&R);
+ if (WidenStoreR && vputils::isSingleScalar(WidenStoreR->getAddr()) &&
+ !WidenStoreR->isConsecutive()) {
+ assert(!WidenStoreR->isReverse() &&
+ "Not consecutive memory recipes shouldn't be reversed");
+ VPValue *Mask = WidenStoreR->getMask();
+
+ // Convert the scatter to a scalar store if it is unmasked or header
+ // masked.
+ if (Mask && !vputils::isHeaderMask(Mask, Plan))
+ continue;
+
+ VPInstruction *Extract;
+ if (!Mask) {
+ Extract = new VPInstruction(VPInstruction::ExtractLastLane,
+ {WidenStoreR->getOperand(1)});
+ } else {
+ // If the mask is the header mask, this mask contains at least one
+ // active lane. So it is safe to convert the scatter to a scalar
+ // store.
+ if (!LoopVectorizationPlanner::getDecisionAndClampRange(
+ [&](ElementCount VF) {
+ InstructionCost ScatterCost =
+ WidenStoreR->computeCost(VF, Ctx);
+ // ConvertToScalarCost = LastActiveLane + ExtractElement +
+ // scalar store.
+ InstructionCost ScalarCost = 0;
+ auto *ValTy = Ctx.Types.inferScalarType(
+ WidenStoreR->getStoredValue());
+
+ if (!FoldTailWithEVL)
+ ScalarCost += Ctx.getLastActiveLaneCost(
+ Type::getInt1Ty(Ctx.LLVMCtx), VF);
----------------
ElvisWang123 wrote:
Add new function `getCostForOpcodeAndTypes()` which may not that accurate as ` getCostForOpcode()` (which use underlying value).
https://github.com/llvm/llvm-project/pull/172799
More information about the llvm-commits
mailing list