[llvm] [LV][VPlan] Add initial support for CSA vectorization (PR #106560)

Elvis Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 9 21:47:55 PST 2024


================
@@ -2346,6 +2444,222 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPCSAHeaderPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+                                 VPSlotTracker &SlotTracker) const {
+  O << Indent << "EMIT ";
+  printAsOperand(O, SlotTracker);
+  O << " = csa-data-phi ";
+  printOperands(O, SlotTracker);
+}
+#endif
+
+void VPCSAHeaderPHIRecipe::execute(VPTransformState &State) {
+  // PrevBB is this BB
+  IRBuilder<>::InsertPointGuard Guard(State.Builder);
+  State.Builder.SetInsertPoint(State.CFG.PrevBB->getFirstNonPHI());
+
+  Value *InitData = State.get(getVPInitData(), 0);
+  PHINode *DataPhi =
+      State.Builder.CreatePHI(InitData->getType(), 2, "csa.data.phi");
+  BasicBlock *PreheaderBB = State.CFG.getPreheaderBBFor(this);
+  DataPhi->addIncoming(InitData, PreheaderBB);
+  // Note: We didn't add Incoming for the new data since VPCSADataUpdateRecipe
+  // may not have been executed. We let VPCSADataUpdateRecipe::execute add the
+  // incoming operand to DataPhi.
+
+  State.set(this, DataPhi);
+}
+
+InstructionCost VPCSAHeaderPHIRecipe::computeCost(ElementCount VF,
+                                                  VPCostContext &Ctx) const {
+  if (VF.isScalar())
+    return 0;
+
+  InstructionCost C = 0;
+  auto *VTy = VectorType::get(getUnderlyingValue()->getType(), VF);
+  const TargetTransformInfo &TTI = Ctx.TTI;
+
+  // FIXME: These costs should be moved into VPInstruction::computeCost. We put
+  // them here for now since there is no VPInstruction::computeCost support.
+  // CSAInitMask
+  C += TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VTy);
+  // CSAInitData
+  C += TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VTy);
+  return C;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPCSADataUpdateRecipe::print(raw_ostream &O, const Twine &Indent,
+                                  VPSlotTracker &SlotTracker) const {
+  O << Indent << "EMIT ";
+  printAsOperand(O, SlotTracker);
+  O << " = csa-data-update ";
+  printOperands(O, SlotTracker);
+}
+#endif
+
+void VPCSADataUpdateRecipe::execute(VPTransformState &State) {
+  Value *AnyActive = State.get(getVPAnyActive(), /*NeedsScalar=*/true);
+  Value *DataUpdate = getVPDataPhi() == getVPTrue() ? State.get(getVPFalse())
+                                                    : State.get(getVPTrue());
+  PHINode *DataPhi = cast<PHINode>(State.get(getVPDataPhi()));
+  Value *DataSel = State.Builder.CreateSelect(AnyActive, DataUpdate, DataPhi,
+                                              "csa.data.sel");
+
+  DataPhi->addIncoming(DataSel, State.CFG.PrevBB);
+
+  State.set(this, DataSel);
+}
+
+InstructionCost VPCSADataUpdateRecipe::computeCost(ElementCount VF,
+                                                   VPCostContext &Ctx) const {
+  if (VF.isScalar())
+    return 0;
+
+  InstructionCost C = 0;
+  auto *VTy = VectorType::get(getUnderlyingValue()->getType(), VF);
----------------
ElvisWang123 wrote:

Using `inferScalarType(this)` instead of `getUnderlyingValue()->getType()` would be better.

https://github.com/llvm/llvm-project/pull/106560


More information about the llvm-commits mailing list