[llvm] [LV][VPlan] Add initial support for CSA vectorization (PR #106560)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 3 12:00:47 PDT 2024


================
@@ -8540,6 +8599,107 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
   return tryToWiden(Instr, Operands, VPBB);
 }
 
+/// Add CSA Recipes that can occur before each instruction in the input IR
+/// is processed and introduced into VPlan.
+static void
+addCSAPreprocessRecipes(const LoopVectorizationLegality::CSAList &CSAs,
+                        Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
+                        VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
+                        VPlan &Plan) {
+
+  // Don't build full CSA for VF=ElementCount::getFixed(1)
+  bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
+      [&](ElementCount VF) { return VF.isScalar(); }, Range);
+
+  for (const auto &CSA : CSAs) {
+    VPValue *VPInitScalar = Plan.getOrAddLiveIn(
+        CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
+
+    // Scalar VF builds the scalar version of the loop. In that case,
+    // no maintenence of mask nor extraction in middle block is needed.
+    if (IsScalarVF) {
+      VPCSAState *S = new VPCSAState(VPInitScalar);
+      Plan.addCSAState(CSA.first, S);
+      continue;
+    }
+
+    auto *VPInitMask =
+        new VPInstruction(VPInstruction::CSAInitMask, {}, DL, "csa.init.mask");
+    auto *VPInitData = new VPInstruction(VPInstruction::CSAInitData,
+                                         {VPInitScalar}, DL, "csa.init.data");
+    PreheaderVPBB->appendRecipe(VPInitMask);
+    PreheaderVPBB->appendRecipe(VPInitData);
+
+    auto *VPMaskPhi = new VPInstruction(VPInstruction::CSAMaskPhi, {VPInitMask},
+                                        DL, "csa.mask.phi");
+    HeaderVPBB->appendRecipe(VPMaskPhi);
+
+    auto *S = new VPCSAState(VPInitScalar, VPInitData, VPMaskPhi);
+    Plan.addCSAState(CSA.first, S);
+  }
+}
+
+/// Add CSA Recipes that must occur after each instruction in the input IR
+/// is processed and introduced into VPlan.
+static void
+addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
+                         const LoopVectorizationLegality::CSAList &CSAs,
+                         VPBasicBlock *MiddleVPBB, DebugLoc DL, VFRange &Range,
+                         VPlan &Plan) {
+  // Don't build CSA for VF=ElementCount::getFixed(1)
+  if (LoopVectorizationPlanner::getDecisionAndClampRange(
+          [&](ElementCount VF) { return VF.isScalar(); }, Range))
+    return;
+
+  for (const auto &CSA : CSAs) {
+    VPCSAState *CSAState = Plan.getCSAStates().find(CSA.first)->second;
+    VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate();
+
+    assert(VPDataUpdate &&
+           "VPDataUpdate must have been introduced prior to postprocess");
+    assert(CSA.second.getCond() &&
+           "CSADescriptor must know how to describe the condition");
+    auto GetVPValue = [&](Value *I) {
+      return RecipeBuilder.getRecipe(cast<Instruction>(I))->getVPSingleValue();
+    };
+    VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
+    VPValue *VPInitScalar = CSAState->getVPInitScalar();
+
+    // The CSA optimization wants to use a condition such that when it is
+    // true, a new value is assigned. However, it is possible that a true lane
+    // in WidenedCond corresponds to selection of the initial value instead.
+    // In that case, we must use the negation of WidenedCond.
+    // i.e. select cond new_val old_val versus select cond.not old_val new_val
+    VPValue *CondToUse = WidenedCond;
+    if (cast<SelectInst>(CSA.second.getAssignment())->getTrueValue() ==
+        CSA.first) {
+      auto *VPNotCond = new VPInstruction(VPInstruction::Not, WidenedCond, DL);
+      VPNotCond->insertBefore(
+          GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
----------------
michaelmaitland wrote:

Updated to use VPBuilder to create vp instructions. used builder to insert where appropriate.

https://github.com/llvm/llvm-project/pull/106560


More information about the llvm-commits mailing list