[llvm] [VPlan] Implement interleaving as VPlan-to-VPlan transform. (PR #95842)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 5 09:05:07 PDT 2024


================
@@ -1622,3 +1622,398 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
     }
   }
 }
+
+namespace {
+class InterleaveState {
+  DenseMap<VPValue *, SmallVector<VPValue *>> InterleavedValues;
+
+public:
+  VPValue *getInterleavedValue(VPValue *V, unsigned IC) {
+    if (IC == 0 || V->isLiveIn())
+      return V;
+    assert(
+        (InterleavedValues.contains(V) && InterleavedValues[V].size() >= IC) &&
+        "accessed value does not exist");
+    return InterleavedValues[V][IC - 1];
+  }
+
+  void addInterleavedValues(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
+                            unsigned I) {
+    for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
+      auto Ins = InterleavedValues.insert({VPV, {}});
+      assert(Ins.first->second.size() == I - 1 && "earlier parts not set");
+      Ins.first->second.push_back(CopyR->getVPValue(Idx));
+    }
+  }
+
+  void addUniform(VPSingleDefRecipe *R, unsigned IC) {
+    auto Ins = InterleavedValues.insert({R, {}});
+    for (unsigned I = 1; I != IC; ++I)
+      Ins.first->second.push_back(R);
+  }
+
+  bool contains(VPValue *VPV) { return InterleavedValues.contains(VPV); }
+
+  DenseMap<VPValue *, SmallVector<VPValue *>> &getInterleavedValues() {
+    return InterleavedValues;
+  }
+
+  void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
+    auto *Op = R->getOperand(OpIdx);
+    R->setOperand(OpIdx, getInterleavedValue(Op, Part));
+  }
+
+  void remapOperands(VPRecipeBase *R, unsigned Part) {
+    for (const auto &[OpIdx, Op] : enumerate(R->operands()))
+      R->setOperand(OpIdx, getInterleavedValue(Op, Part));
+  }
+};
+} // namespace
+
+static void interleaveReplicateRegion(VPRegionBlock *VPR, VPlan &Plan,
+                                      unsigned IC,
+                                      InterleaveState &InterleavedValues) {
+  Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+  VPBlockBase *InsertPt = VPR->getSingleSuccessor();
+  for (unsigned I = 1; I != IC; ++I) {
+    auto *Copy = VPR->clone();
+    VPBlockUtils::insertBlockBefore(Copy, InsertPt);
+
+    auto PartI = vp_depth_first_shallow(Copy->getEntry());
+    auto Part0 = vp_depth_first_shallow(VPR->getEntry());
+    for (const auto &[PartIVPBB, Part0VPBB] :
+         zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
+             VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
+      for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
+        InterleavedValues.remapOperands(&PartIR, I);
+        if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
+          ScalarIVSteps->addOperand(
+              Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+        }
+
+        InterleavedValues.addInterleavedValues(&Part0R, &PartIR, I);
+      }
+    }
+  }
+}
+
+static void interleaveWidenInduction(VPWidenIntOrFpInductionRecipe *IV,
+                                     VPlan &Plan, unsigned IC,
+                                     VPBasicBlock::iterator &InsertPtForPhi,
+                                     InterleaveState &InterleavedValues,
+                                     VPTypeAnalysis &TypeInfo,
+                                     SmallPtrSet<VPRecipeBase *, 8> &ToSkip) {
+  VPBasicBlock *PH = cast<VPBasicBlock>(
+      IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
+  VPValue *Step = &Plan.getVF();
+  Type *IVTy = TypeInfo.inferScalarType(IV);
+  auto &ID = IV->getInductionDescriptor();
+  FastMathFlags FMFs;
+  if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
+    FMFs = ID.getInductionBinOp()->getFastMathFlags();
+
+  if (TypeInfo.inferScalarType(Step) != IVTy) {
+    Instruction::CastOps CastOp =
+        IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc;
+    Step = new VPWidenCastRecipe(CastOp, Step, IV->getScalarType());
+    PH->appendRecipe(Step->getDefiningRecipe());
+    ToSkip.insert(Step->getDefiningRecipe());
+  }
+
+  auto *ConstScale =
+      IV->getOperand(1)->isLiveIn()
+          ? dyn_cast<ConstantInt>(IV->getOperand(1)->getLiveInIRValue())
+          : nullptr;
+  if (!ConstScale || ConstScale->getZExtValue() != 1) {
+    VPValue *Scale = IV->getOperand(1);
+    if (TypeInfo.inferScalarType(Scale) != IVTy) {
+      Scale =
+          new VPWidenCastRecipe(Instruction::Trunc, Scale, IV->getScalarType());
+      PH->appendRecipe(Scale->getDefiningRecipe());
+      ToSkip.insert(Scale->getDefiningRecipe());
+    }
+
+    VPBuilder Builder(PH);
+    VPInstruction *Mul;
+    if (IVTy->isFloatingPointTy())
+      Mul = Builder.createFPOp(Instruction::FMul, {Step, Scale},
+                               IV->getDebugLoc(), "", FMFs);
+    else
+      Mul = Builder.createNaryOp(Instruction::Mul, {Step, Scale},
+                                 IV->getDebugLoc());
+    Step = Mul;
+    ToSkip.insert(Mul);
+  }
+  IV->addOperand(Step);
+
+  for (unsigned I = 1; I != IC; ++I) {
+    VPBuilder Builder;
+    Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
+    VPValue *Prev = InterleavedValues.getInterleavedValue(IV, I - 1);
+    VPInstruction *Add;
+    std::string Name = I > 1 ? "step.add." + std::to_string(I) : "step.add";
+
+    if (IVTy->isFloatingPointTy())
+      Add = Builder.createFPOp(ID.getInductionOpcode(),
+                               {
+                                   Prev,
+                                   Step,
+                               },
+                               IV->getDebugLoc(), Name, FMFs);
+    else
+      Add = Builder.createNaryOp(Instruction::Add,
+                                 {
+                                     Prev,
+                                     Step,
+                                 },
+                                 IV->getDebugLoc(), Name);
+    ToSkip.insert(Add);
+    InterleavedValues.addInterleavedValues(IV, Add, I);
+    InsertPtForPhi = std::next(Add->getIterator());
+  }
+  IV->addOperand(InterleavedValues.getInterleavedValue(IV, IC - 1));
+}
+
+static void interleaveHeaderPHI(VPRecipeBase &R, VPlan &Plan, unsigned IC,
+                                VPBasicBlock::iterator &InsertPtForPhi,
+                                InterleaveState &InterleavedValues,
+                                VPTypeAnalysis &TypeInfo,
+                                SmallPtrSet<VPRecipeBase *, 8> &ToSkip) {
+  // First-order recurrences pass a single vector or scalar through their header
+  // phis, irrespective of interleaving.
+  if (isa<VPFirstOrderRecurrencePHIRecipe>(&R))
+    return;
+
+  // Generate step vectors for each unrolled part.
+  if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
+    interleaveWidenInduction(IV, Plan, IC, InsertPtForPhi, InterleavedValues,
+                             TypeInfo, ToSkip);
+    return;
+  }
+
+  VPRecipeBase *InsertPt = &R;
+  Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+  for (unsigned I = 1; I != IC; ++I) {
+    VPRecipeBase *Copy = R.clone();
+    Copy->insertAfter(InsertPt);
+    InsertPt = Copy;
+    InterleavedValues.addInterleavedValues(&R, Copy, I);
+    if (isa<VPWidenPointerInductionRecipe>(&R)) {
+      if (I == 1)
+        R.addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, IC)));
+      Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, IC)));
+      Copy->addOperand(R.getVPSingleValue());
+      Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+      continue;
+    }
+
+    if (auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
+      if (RdxPhi->isOrdered()) {
+        Copy->eraseFromParent();
+        break;
+      }
+      Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+    }
+  }
+}
+
+/// Handle non-uniform, non-header-phi recipes.
+static void interleaveRecipe(VPRecipeBase &R, VPlan &Plan, unsigned IC,
+                             InterleaveState &InterleavedValues,
+                             VPTypeAnalysis &TypeInfo) {
+  using namespace llvm::VPlanPatternMatch;
+  if (match(&R, m_BranchOnCond(m_VPValue())) ||
+      match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
+    return;
+
+  VPValue *Op0;
+  if (match(&R, m_VPInstruction<VPInstruction::ExtractFromEnd>(m_VPValue(Op0),
+                                                               m_VPValue()))) {
+    InterleavedValues.addUniform(cast<VPInstruction>(&R), IC);
+    if (Plan.hasScalarVFOnly()) {
+      unsigned Offset = cast<ConstantInt>(R.getOperand(1)->getLiveInIRValue())
+                            ->getZExtValue();
+      R.getVPSingleValue()->replaceAllUsesWith(
+          InterleavedValues.getInterleavedValue(Op0, IC - Offset));
+    } else {
+      InterleavedValues.remapOperands(&R, IC - 1);
----------------
fhahn wrote:

Done thanks!

https://github.com/llvm/llvm-project/pull/95842


More information about the llvm-commits mailing list