[llvm] [LV] Convert scatter w/uniform addr and mask being header mask to scalar store. (PR #172799)

Elvis Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 9 22:14:50 PST 2026


================
@@ -1602,6 +1602,90 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
   }
 }
 
+void VPlanTransforms::narrowScatters(VPlan &Plan, VPCostContext &Ctx,
+                                     VFRange &Range,
+                                     const bool &FoldTailWithEVL) {
+  if (Plan.hasScalarVFOnly())
+    return;
+
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+    for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
+      if (!isa<VPWidenStoreRecipe>(&R))
+        continue;
+      // Convert an unmasked or header masked scatter with an uniform address
+      // into extract-last-lane + scalar store.
+      // TODO: Add a profitability check comparing the cost of a scatter vs.
+      // extract + scalar store.
+      auto *WidenStoreR = dyn_cast<VPWidenStoreRecipe>(&R);
+      if (WidenStoreR && vputils::isSingleScalar(WidenStoreR->getAddr()) &&
+          !WidenStoreR->isConsecutive()) {
+        assert(!WidenStoreR->isReverse() &&
+               "Not consecutive memory recipes shouldn't be reversed");
+        VPValue *Mask = WidenStoreR->getMask();
+
+        // Convert the scatter to a scalar store if it is unmasked or header
+        // masked.
+        if (Mask && !vputils::isHeaderMask(Mask, Plan))
+          continue;
+
+        VPInstruction *Extract;
+        if (!Mask) {
+          Extract = new VPInstruction(VPInstruction::ExtractLastLane,
+                                      {WidenStoreR->getOperand(1)});
+        } else {
+          // If the mask is the header mask, this mask contains at least one
+          // active lane. So it is safe to convert the scatter to a scalar
+          // store.
+          if (!LoopVectorizationPlanner::getDecisionAndClampRange(
+                  [&](ElementCount VF) {
+                    InstructionCost ScatterCost =
+                        WidenStoreR->computeCost(VF, Ctx);
+                    // ConvertToScalarCost = LastActiveLane + ExtractElement +
+                    // scalar store.
+                    InstructionCost ScalarCost = 0;
+                    auto *ValTy = Ctx.Types.inferScalarType(
+                        WidenStoreR->getStoredValue());
+
+                    if (!FoldTailWithEVL)
+                      ScalarCost += Ctx.getLastActiveLaneCost(
+                          Type::getInt1Ty(Ctx.LLVMCtx), VF);
----------------
ElvisWang123 wrote:

Add new function `getCostForOpcodeAndTypes()` which may not that accurate  as ` getCostForOpcode()` (which use underlying value).

https://github.com/llvm/llvm-project/pull/172799


More information about the llvm-commits mailing list