[llvm] [VPlan] Implement interleaving as VPlan-to-VPlan transform. (PR #95842)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 14 13:03:38 PDT 2024
================
@@ -1572,3 +1572,421 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
}
}
}
+
+static VPValue *getInterleavedValue(
+ DenseMap<VPValue *, SmallVector<VPValue *>> &InterleavedValues, VPValue *V,
+ unsigned IC) {
+ if (IC == 0)
+ return V;
+ if (V->isLiveIn())
+ return V;
+ return InterleavedValues[V][IC - 1];
+}
+
+static void interleaveReplicateRegion(
+ VPRegionBlock *VPR, VPlan &Plan, unsigned IC,
+ DenseMap<VPValue *, SmallVector<VPValue *>> &InterleavedValues) {
+ Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+ VPBlockBase *InsertPt = VPR;
+ for (unsigned I = 1; I != IC; ++I) {
+ auto *Copy = VPR->clone();
+ VPBlockUtils::insertBlockAfter(Copy, InsertPt);
+ InsertPt = Copy;
+
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
+ RPOT(Copy->getEntry());
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
+ RPOT2(VPR->getEntry());
+ for (const auto &[New, Old] :
+ zip(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT),
+ VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT2))) {
+ if (New->getParent() != Copy)
+ break;
+ for (const auto &[CopyR, OrigR] : zip(*New, *Old)) {
+ for (unsigned Idx = 0; Idx != CopyR.getNumOperands(); ++Idx) {
+ CopyR.setOperand(Idx, getInterleavedValue(InterleavedValues,
+ CopyR.getOperand(Idx), I));
+ }
+ if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&CopyR)) {
+ ScalarIVSteps->addOperand(
+ Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+ }
+
+ unsigned Idx = 0;
+ for (VPValue *Res : OrigR.definedValues()) {
+ auto Ins = InterleavedValues.insert({Res, {}});
+ Ins.first->second.push_back(CopyR.getVPValue(Idx));
+ Idx++;
+ }
+ }
+ }
+ }
+}
+
+static void interleaveHeaderPHI(
+ VPRecipeBase &R, VPlan &Plan, unsigned IC,
+ VPBasicBlock::iterator &InsertPtForPhi,
+ DenseMap<VPValue *, SmallVector<VPValue *>> &InterleavedValues,
+ VPTypeAnalysis &TypeInfo, SmallPtrSet<VPRecipeBase *, 8> &ToSkip,
+ SmallVector<SmallVector<VPHeaderPHIRecipe *>> &PhisToRemap) {
+ if (isa<VPFirstOrderRecurrencePHIRecipe>(&R))
+ return;
+
+ // Generate step vectors for each unrolled part.
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
+ VPBasicBlock *PH =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
+ VPValue *Step = &Plan.getVF();
+ Type *IVTy = TypeInfo.inferScalarType(IV);
+ auto &ID = IV->getInductionDescriptor();
+ FastMathFlags FMFs;
+ if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
+ FMFs = ID.getInductionBinOp()->getFastMathFlags();
+
+ if (TypeInfo.inferScalarType(Step) != IVTy) {
+ Instruction::CastOps CastOp;
+ if (IVTy->isFloatingPointTy())
+ CastOp = Instruction::UIToFP;
+ else
+ CastOp = Instruction::Trunc;
+ Step = new VPWidenCastRecipe(CastOp, Step, IV->getScalarType());
+ PH->appendRecipe(Step->getDefiningRecipe());
+ ToSkip.insert(Step->getDefiningRecipe());
+ }
+
+ auto *ConstScale =
+ IV->getOperand(1)->isLiveIn()
+ ? dyn_cast<ConstantInt>(IV->getOperand(1)->getLiveInIRValue())
+ : nullptr;
+ if (!ConstScale || ConstScale->getZExtValue() != 1) {
+ VPValue *Scale = IV->getOperand(1);
+ if (TypeInfo.inferScalarType(Scale) != IVTy) {
+ Scale = new VPWidenCastRecipe(Instruction::Trunc, Scale,
+ IV->getScalarType());
+ PH->appendRecipe(Scale->getDefiningRecipe());
+ ToSkip.insert(Scale->getDefiningRecipe());
+ }
+
+ VPBuilder Builder(PH);
+ VPInstruction *Mul;
+ if (IVTy->isFloatingPointTy())
+ Mul = Builder.createFPOp(Instruction::FMul, {Step, Scale},
+ R.getDebugLoc(), "", FMFs);
+ else
+ Mul = Builder.createNaryOp(Instruction::Mul, {Step, Scale},
+ R.getDebugLoc());
+ Step = Mul;
+ ToSkip.insert(Mul);
+ }
+ R.addOperand(Step);
+
+ for (unsigned I = 1; I != IC; ++I) {
+ VPBuilder Builder;
+ Builder.setInsertPoint(R.getParent(), InsertPtForPhi);
+ auto Ins = InterleavedValues.insert({IV, {}});
+ VPValue *Prev = getInterleavedValue(InterleavedValues, IV, I - 1);
+ VPInstruction *Add;
+ std::string Name = I > 1 ? "step.add." + std::to_string(I) : "step.add";
+
+ if (IVTy->isFloatingPointTy())
+ Add = Builder.createFPOp(ID.getInductionOpcode(),
+ {
+ Prev,
+ Step,
+ },
+ R.getDebugLoc(), Name, FMFs);
+ else
+ Add = Builder.createNaryOp(Instruction::Add,
+ {
+ Prev,
+ Step,
+ },
+ R.getDebugLoc(), Name);
+ ToSkip.insert(Add);
+ Ins.first->second.push_back(Add);
+ InsertPtForPhi = std::next(Add->getIterator());
+ }
+ R.addOperand(getInterleavedValue(InterleavedValues, IV, IC - 1));
+ return;
+ }
+
+ VPRecipeBase *InsertPt = &R;
+ Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+ for (unsigned I = 1; I != IC; ++I) {
+ VPRecipeBase *Copy = R.clone();
+ Copy->insertAfter(InsertPt);
+ InsertPt = Copy;
+ unsigned Idx = 0;
+ for (VPValue *Res : R.definedValues()) {
+ auto Ins = InterleavedValues.insert({Res, {}});
+ Ins.first->second.push_back(Copy->getVPValue(Idx));
+ Idx++;
+ }
+ if (isa<VPWidenPointerInductionRecipe>(&R)) {
+ if (I == 1)
+ R.addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, IC)));
+ Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, IC)));
+ Copy->addOperand(R.getVPSingleValue());
+ Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+ continue;
+ }
+
+ if (auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
+ if (RdxPhi->isOrdered()) {
+ Copy->eraseFromParent();
+ break;
+ }
+ Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+ }
+
+ if (I == 1)
+ PhisToRemap.emplace_back();
+
+ auto *H = cast<VPHeaderPHIRecipe>(Copy);
+ PhisToRemap.back().push_back(H);
+ }
+}
+
+static void
+interleaveRecipe(VPRecipeBase &R, VPlan &Plan, unsigned IC,
+ DenseMap<VPValue *, SmallVector<VPValue *>> &InterleavedValues,
+ VPTypeAnalysis &TypeInfo) {
+ using namespace llvm::VPlanPatternMatch;
+ VPValue *Op1;
+ if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
+ m_VPValue(), m_VPValue(Op1)))) {
+ auto Ins = InterleavedValues.insert({R.getVPSingleValue(), {}});
+ for (unsigned I = 1; I != IC; ++I) {
+ R.addOperand(getInterleavedValue(InterleavedValues, Op1, I));
+ Ins.first->second.push_back(R.getVPSingleValue());
+ }
+ return;
+ }
+ VPValue *Op0;
+ if (match(&R, m_VPInstruction<VPInstruction::ExtractFromEnd>(m_VPValue(Op0),
+ m_VPValue()))) {
+ auto Ins = InterleavedValues.insert({R.getVPSingleValue(), {}});
+ for (unsigned I = 1; I != IC; ++I) {
+ Ins.first->second.push_back(R.getVPSingleValue());
+ }
+
+ bool ScalarVFOnly = Plan.hasScalarVFOnly();
+ if (!ScalarVFOnly) {
+ R.setOperand(0, getInterleavedValue(InterleavedValues, Op0, IC - 1));
+ return;
+ }
+ }
+
+ Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+ if (isa<VPInstruction>(&R) && cast<VPInstruction>(&R)->getOpcode() ==
+ VPInstruction::CalculateTripCountMinusVF) {
+ R.addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, IC)));
+ auto Ins = InterleavedValues.insert({R.getVPSingleValue(), {}});
+ for (unsigned I = 1; I != IC; ++I) {
+ Ins.first->second.push_back(R.getVPSingleValue());
+ }
+
+ return;
+ }
+
+ if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
+ if (VPI->getOpcode() == VPInstruction::BranchOnCount ||
+ VPI->getOpcode() == VPInstruction::BranchOnCond)
+ return;
+ }
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
+ if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
+ RepR->getOperand(1)->isDefinedOutsideVectorRegions()) {
+ R.setOperand(
+ 0, getInterleavedValue(InterleavedValues, R.getOperand(0), IC - 1));
+ return;
+ }
+ if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
+ if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
+ auto Ins = InterleavedValues.insert({RepR, {}});
+ Ins.first->second.push_back(RepR);
+ return;
+ }
+ }
+ }
+
+ // TODO: Generalize for any uniform recipe.
+ if (auto *Cast = dyn_cast<VPWidenCastRecipe>(&R)) {
+ if (Cast->getOperand(0)->isLiveIn()) {
+ auto Ins = InterleavedValues.insert({Cast, {}});
+ Ins.first->second.push_back(Cast);
+ return;
+ }
+ }
+
+ if (isa<VPInstruction>(&R) &&
+ vputils::onlyFirstPartUsed(R.getVPSingleValue())) {
+ auto Ins = InterleavedValues.insert({R.getVPSingleValue(), {}});
+ for (unsigned I = 1; I != IC; ++I) {
+ Ins.first->second.push_back(R.getVPSingleValue());
+ }
+
+ return;
+ }
+
+ VPRecipeBase *InsertPt = &R;
+ for (unsigned I = 1; I != IC; ++I) {
+ VPRecipeBase *Copy = R.clone();
+ Copy->insertAfter(InsertPt);
+ InsertPt = Copy;
+ unsigned Idx = 0;
+ for (VPValue *Res : R.definedValues()) {
+ auto Ins = InterleavedValues.insert({Res, {}});
+ Ins.first->second.push_back(Copy->getVPValue(Idx));
+ Idx++;
+ }
+
+ if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
+ if (VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart) {
+ Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+ }
+ if (VPI->getOpcode() == VPInstruction::FirstOrderRecurrenceSplice) {
+ Copy->setOperand(
+ 0, getInterleavedValue(InterleavedValues, R.getOperand(1), I - 1));
+ Copy->setOperand(
+ 1, getInterleavedValue(InterleavedValues, R.getOperand(1), I));
+ continue;
+ }
+ }
+ if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
+ auto *Phi = cast<VPReductionPHIRecipe>(R.getOperand(0));
+ if (Phi->isOrdered()) {
+ auto Ins = InterleavedValues.insert({Phi, {}});
+ if (I == 1) {
+ Ins.first->second.clear();
+ Ins.first->second.push_back(Red);
+ }
+ Ins.first->second.push_back(Copy->getVPSingleValue());
+ Phi->setOperand(1, Copy->getVPSingleValue());
+ }
+ }
+ for (unsigned Idx = 0; Idx != Copy->getNumOperands(); ++Idx)
+ Copy->setOperand(Idx, getInterleavedValue(InterleavedValues,
+ Copy->getOperand(Idx), I));
+
+ // Add operand indicating the part to generate code for to recipes still
+ // requiring it.
+ if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
+ VPVectorPointerRecipe>(Copy))
+ Copy->addOperand(Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, I)));
+
+ if (isa<VPVectorPointerRecipe>(R))
+ Copy->setOperand(0, R.getOperand(0));
+ }
+}
+
+static void
+interleaveBlock(VPBlockBase *VPB, VPlan &Plan, unsigned IC,
+ DenseMap<VPValue *, SmallVector<VPValue *>> &InterleavedValues,
+ VPTypeAnalysis &TypeInfo,
+ SmallPtrSet<VPRecipeBase *, 8> &ToSkip,
+ SmallVector<SmallVector<VPHeaderPHIRecipe *>> &PhisToRemap) {
+ auto *VPR = dyn_cast<VPRegionBlock>(VPB);
+ if (VPR) {
+ if (VPR->isReplicator())
+ interleaveReplicateRegion(VPR, Plan, IC, InterleavedValues);
+ else {
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
+ RPOT(VPR->getEntry());
+ for (VPBlockBase *VPB : RPOT) {
+ interleaveBlock(VPB, Plan, IC, InterleavedValues, TypeInfo, ToSkip,
+ PhisToRemap);
+ }
+ }
+ return;
+ }
+
+ auto *VPBB = cast<VPBasicBlock>(VPB);
+ auto InsertPtForPhi = VPBB->getFirstNonPhi();
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ if (ToSkip.contains(&R))
+ continue;
+
+ auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
+ if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
+ for (unsigned I = 1; I != IC; ++I) {
+ auto Ins = InterleavedValues.insert({SingleDef, {}});
+ Ins.first->second.push_back(SingleDef);
+ }
+ continue;
+ }
+
+ if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
+ interleaveHeaderPHI(R, Plan, IC, InsertPtForPhi, InterleavedValues,
+ TypeInfo, ToSkip, PhisToRemap);
+ continue;
+ }
+
+ interleaveRecipe(R, Plan, IC, InterleavedValues, TypeInfo);
----------------
fhahn wrote:
Done, thanks!
https://github.com/llvm/llvm-project/pull/95842
More information about the llvm-commits
mailing list