[llvm] [VPlan] Implement interleaving as VPlan-to-VPlan transform. (PR #95842)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 15:24:32 PDT 2024
================
@@ -1622,3 +1622,398 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
}
}
}
+
+namespace {
+class InterleaveState {
+ DenseMap<VPValue *, SmallVector<VPValue *>> InterleavedValues;
+
+public:
+ VPValue *getInterleavedValue(VPValue *V, unsigned IC) {
+ if (IC == 0 || V->isLiveIn())
+ return V;
+ assert(
+ (InterleavedValues.contains(V) && InterleavedValues[V].size() >= IC) &&
+ "accessed value does not exist");
+ return InterleavedValues[V][IC - 1];
+ }
+
+ void addInterleavedValues(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
+ unsigned I) {
+ for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
+ auto Ins = InterleavedValues.insert({VPV, {}});
+ assert(Ins.first->second.size() == I - 1 && "earlier parts not set");
+ Ins.first->second.push_back(CopyR->getVPValue(Idx));
+ }
+ }
+
+ void addUniform(VPSingleDefRecipe *R, unsigned IC) {
+ auto Ins = InterleavedValues.insert({R, {}});
+ for (unsigned I = 1; I != IC; ++I)
+ Ins.first->second.push_back(R);
+ }
+
+ bool contains(VPValue *VPV) { return InterleavedValues.contains(VPV); }
+
+ DenseMap<VPValue *, SmallVector<VPValue *>> &getInterleavedValues() {
+ return InterleavedValues;
+ }
+
+ void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
+ auto *Op = R->getOperand(OpIdx);
+ R->setOperand(OpIdx, getInterleavedValue(Op, Part));
+ }
+
+ void remapOperands(VPRecipeBase *R, unsigned Part) {
+ for (const auto &[OpIdx, Op] : enumerate(R->operands()))
+ R->setOperand(OpIdx, getInterleavedValue(Op, Part));
+ }
+};
+} // namespace
+
+static void interleaveReplicateRegion(VPRegionBlock *VPR, VPlan &Plan,
+ unsigned IC,
+ InterleaveState &InterleavedValues) {
+ Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+ VPBlockBase *InsertPt = VPR->getSingleSuccessor();
+ for (unsigned I = 1; I != IC; ++I) {
+ auto *Copy = VPR->clone();
+ VPBlockUtils::insertBlockBefore(Copy, InsertPt);
+
+ auto PartI = vp_depth_first_shallow(Copy->getEntry());
+ auto Part0 = vp_depth_first_shallow(VPR->getEntry());
+ for (const auto &[PartIVPBB, Part0VPBB] :
+ zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
+ VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
+ for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
+ InterleavedValues.remapOperands(&PartIR, I);
+ if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
+ ScalarIVSteps->addOperand(
+ Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+ }
+
+ InterleavedValues.addInterleavedValues(&Part0R, &PartIR, I);
+ }
+ }
+ }
+}
+
+static void interleaveWidenInduction(VPWidenIntOrFpInductionRecipe *IV,
+ VPlan &Plan, unsigned IC,
+ VPBasicBlock::iterator &InsertPtForPhi,
+ InterleaveState &InterleavedValues,
+ VPTypeAnalysis &TypeInfo,
+ SmallPtrSet<VPRecipeBase *, 8> &ToSkip) {
+ VPBasicBlock *PH = cast<VPBasicBlock>(
+ IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
+ VPValue *Step = &Plan.getVF();
+ Type *IVTy = TypeInfo.inferScalarType(IV);
+ auto &ID = IV->getInductionDescriptor();
+ FastMathFlags FMFs;
+ if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
+ FMFs = ID.getInductionBinOp()->getFastMathFlags();
+
+ if (TypeInfo.inferScalarType(Step) != IVTy) {
+ Instruction::CastOps CastOp =
+ IVTy->isFloatingPointTy() ? Instruction::UIToFP : Instruction::Trunc;
+ Step = new VPWidenCastRecipe(CastOp, Step, IV->getScalarType());
+ PH->appendRecipe(Step->getDefiningRecipe());
+ ToSkip.insert(Step->getDefiningRecipe());
+ }
+
+ auto *ConstScale =
+ IV->getOperand(1)->isLiveIn()
+ ? dyn_cast<ConstantInt>(IV->getOperand(1)->getLiveInIRValue())
+ : nullptr;
+ if (!ConstScale || ConstScale->getZExtValue() != 1) {
+ VPValue *Scale = IV->getOperand(1);
+ if (TypeInfo.inferScalarType(Scale) != IVTy) {
+ Scale =
+ new VPWidenCastRecipe(Instruction::Trunc, Scale, IV->getScalarType());
+ PH->appendRecipe(Scale->getDefiningRecipe());
+ ToSkip.insert(Scale->getDefiningRecipe());
+ }
+
+ VPBuilder Builder(PH);
+ VPInstruction *Mul;
+ if (IVTy->isFloatingPointTy())
+ Mul = Builder.createFPOp(Instruction::FMul, {Step, Scale},
+ IV->getDebugLoc(), "", FMFs);
+ else
+ Mul = Builder.createNaryOp(Instruction::Mul, {Step, Scale},
+ IV->getDebugLoc());
+ Step = Mul;
+ ToSkip.insert(Mul);
+ }
+ IV->addOperand(Step);
+
+ for (unsigned I = 1; I != IC; ++I) {
+ VPBuilder Builder;
+ Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
+ VPValue *Prev = InterleavedValues.getInterleavedValue(IV, I - 1);
+ VPInstruction *Add;
+ std::string Name = I > 1 ? "step.add." + std::to_string(I) : "step.add";
+
+ if (IVTy->isFloatingPointTy())
+ Add = Builder.createFPOp(ID.getInductionOpcode(),
+ {
+ Prev,
+ Step,
+ },
+ IV->getDebugLoc(), Name, FMFs);
+ else
+ Add = Builder.createNaryOp(Instruction::Add,
+ {
+ Prev,
+ Step,
+ },
+ IV->getDebugLoc(), Name);
+ ToSkip.insert(Add);
+ InterleavedValues.addInterleavedValues(IV, Add, I);
+ InsertPtForPhi = std::next(Add->getIterator());
+ }
+ IV->addOperand(InterleavedValues.getInterleavedValue(IV, IC - 1));
+}
+
+static void interleaveHeaderPHI(VPRecipeBase &R, VPlan &Plan, unsigned IC,
+ VPBasicBlock::iterator &InsertPtForPhi,
+ InterleaveState &InterleavedValues,
+ VPTypeAnalysis &TypeInfo,
+ SmallPtrSet<VPRecipeBase *, 8> &ToSkip) {
+ // First-order recurrences pass a single vector or scalar through their header
+ // phis, irrespective of interleaving.
+ if (isa<VPFirstOrderRecurrencePHIRecipe>(&R))
+ return;
+
+ // Generate step vectors for each unrolled part.
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
+ interleaveWidenInduction(IV, Plan, IC, InsertPtForPhi, InterleavedValues,
+ TypeInfo, ToSkip);
+ return;
+ }
+
+ VPRecipeBase *InsertPt = &R;
+ Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+ for (unsigned I = 1; I != IC; ++I) {
+ VPRecipeBase *Copy = R.clone();
+ Copy->insertAfter(InsertPt);
+ InsertPt = Copy;
+ InterleavedValues.addInterleavedValues(&R, Copy, I);
+ if (isa<VPWidenPointerInductionRecipe>(&R)) {
+ if (I == 1)
+ R.addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, IC)));
+ Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, IC)));
+ Copy->addOperand(R.getVPSingleValue());
+ Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+ continue;
+ }
+
+ if (auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
+ if (RdxPhi->isOrdered()) {
+ Copy->eraseFromParent();
+ break;
+ }
+ Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+ }
+ }
+}
+
+/// Handle non-uniform, non-header-phi recipes.
+static void interleaveRecipe(VPRecipeBase &R, VPlan &Plan, unsigned IC,
+ InterleaveState &InterleavedValues,
+ VPTypeAnalysis &TypeInfo) {
+ using namespace llvm::VPlanPatternMatch;
+ if (match(&R, m_BranchOnCond(m_VPValue())) ||
+ match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
+ return;
+
+ VPValue *Op0;
+ if (match(&R, m_VPInstruction<VPInstruction::ExtractFromEnd>(m_VPValue(Op0),
+ m_VPValue()))) {
+ InterleavedValues.addUniform(cast<VPInstruction>(&R), IC);
+ if (Plan.hasScalarVFOnly()) {
+ unsigned Offset = cast<ConstantInt>(R.getOperand(1)->getLiveInIRValue())
+ ->getZExtValue();
+ R.getVPSingleValue()->replaceAllUsesWith(
+ InterleavedValues.getInterleavedValue(Op0, IC - Offset));
+ } else {
+ InterleavedValues.remapOperands(&R, IC - 1);
----------------
ayalz wrote:
Worth explaining - extracting from end of (vector of) last part?
https://github.com/llvm/llvm-project/pull/95842
More information about the llvm-commits
mailing list