[llvm] [LV][EVL] Support interleaved access with tail folding by EVL (PR #152070)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 04:20:48 PDT 2025
================
@@ -2386,90 +2390,186 @@ class LLVM_ABI_FOR_TEST VPInterleaveRecipe : public VPRecipeBase {
/// unusued gaps can be loaded speculatively.
bool NeedsMaskForGaps = false;
-public:
- VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
- ArrayRef<VPValue *> StoredValues, VPValue *Mask,
- bool NeedsMaskForGaps, DebugLoc DL)
- : VPRecipeBase(VPDef::VPInterleaveSC, {Addr},
- DL),
-
- IG(IG), NeedsMaskForGaps(NeedsMaskForGaps) {
+ VPInterleaveBase(const unsigned char SC,
+ const InterleaveGroup<Instruction> *IG,
+ ArrayRef<VPValue *> Operands,
+ ArrayRef<VPValue *> StoredValues, VPValue *Mask,
+ bool NeedsMaskForGaps, DebugLoc DL)
+ : VPRecipeBase(SC, Operands, DL), IG(IG),
+ NeedsMaskForGaps(NeedsMaskForGaps) {
// TODO: extend the masked interleaved-group support to reversed access.
assert((!Mask || !IG->isReverse()) &&
"Reversed masked interleave-group not supported.");
- for (unsigned i = 0; i < IG->getFactor(); ++i)
- if (Instruction *I = IG->getMember(i)) {
- if (I->getType()->isVoidTy())
+ for (unsigned I = 0; I < IG->getFactor(); ++I)
+ if (Instruction *Inst = IG->getMember(I)) {
+ if (Inst->getType()->isVoidTy())
continue;
- new VPValue(I, this);
+ new VPValue(Inst, this);
}
for (auto *SV : StoredValues)
addOperand(SV);
+
if (Mask) {
HasMask = true;
addOperand(Mask);
}
}
- ~VPInterleaveRecipe() override = default;
- VPInterleaveRecipe *clone() override {
- return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
- NeedsMaskForGaps, getDebugLoc());
+public:
+ VPInterleaveBase *clone() override {
+ llvm_unreachable("cloning not supported");
}
- VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
+ R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
/// Return the address accessed by this recipe.
VPValue *getAddr() const {
return getOperand(0); // Address is the 1st, mandatory operand.
}
+ /// Return true if the access needs a mask because of the gaps.
+ bool needsMaskForGaps() const { return NeedsMaskForGaps; }
+
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
VPValue *getMask() const {
- // Mask is optional and therefore the last, currently 2nd operand.
+ // Mask is optional and the last operand.
return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
}
+ const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
+
+ Instruction *getInsertPos() const { return IG->getInsertPos(); }
+
+ void execute(VPTransformState &State) override {
+ llvm_unreachable("VPInterleaveBase should not be instantiated.");
+ }
+
+ /// Return the cost of this VPInterleaveRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ virtual bool onlyFirstLaneUsed(const VPValue *Op) const = 0;
+
+ /// Returns the number of stored operands of this interleave group. Returns 0
+ /// for load interleave groups.
+ virtual unsigned getNumStoreOperands() const = 0;
+
/// Return the VPValues stored by this interleave group. If it is a load
/// interleave group, return an empty ArrayRef.
- ArrayRef<VPValue *> getStoredValues() const {
- // The first operand is the address, followed by the stored values, followed
- // by an optional mask.
- return ArrayRef<VPValue *>(op_begin(), getNumOperands())
- .slice(1, getNumStoreOperands());
+ virtual ArrayRef<VPValue *> getStoredValues() const = 0;
+};
+
+/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
+/// or stores into one wide load/store and shuffles. The first operand of a
+/// VPInterleave recipe is the address, followed by the stored values, followed
+/// by an optional mask.
+class LLVM_ABI_FOR_TEST VPInterleaveRecipe final : public VPInterleaveBase {
+public:
+ VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
+ ArrayRef<VPValue *> StoredValues, VPValue *Mask,
+ bool NeedsMaskForGaps, DebugLoc DL)
+ : VPInterleaveBase(VPDef::VPInterleaveSC, IG, ArrayRef<VPValue *>({Addr}),
+ StoredValues, Mask, NeedsMaskForGaps, DL) {}
+
+ ~VPInterleaveRecipe() override = default;
+
+ VPInterleaveRecipe *clone() override {
+ return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
+ NeedsMaskForGaps, getDebugLoc());
}
+ VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
+
/// Generate the wide load or store, and shuffles.
void execute(VPTransformState &State) override;
- /// Return the cost of this VPInterleaveRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override;
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
- const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
+ }
- /// Returns the number of stored operands of this interleave group. Returns 0
- /// for load interleave groups.
- unsigned getNumStoreOperands() const {
+ unsigned getNumStoreOperands() const override {
return getNumOperands() - (HasMask ? 2 : 1);
}
- /// The recipe only uses the first lane of the address.
+ ArrayRef<VPValue *> getStoredValues() const override {
+ // The first operand is the address, followed by the stored values, followed
+ // by an optional mask.
+ return ArrayRef<VPValue *>(op_begin(), getNumOperands())
+ .slice(1, getNumStoreOperands());
+ }
+};
+
+/// A recipe for interleaved access operations with vector-predication
+/// intrinsics. The first operand is the address, the second operand is the
+/// explicit vector length . Stored values and mask are optional operands.
+class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
+public:
+ VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask,
+ DebugLoc DL = {})
+ : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
+ ArrayRef<VPValue *>({R.getAddr(), &EVL}),
+ R.getStoredValues(), Mask, R.needsMaskForGaps(), DL) {
+ assert(!IG->isReverse() &&
+ "Reversed interleave-group with tail folding is not supported.");
+ }
+
+ ~VPInterleaveEVLRecipe() override = default;
+
+ VPInterleaveEVLRecipe *clone() override {
+ llvm_unreachable("cloning not implemented yet");
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
+
+ /// The VPValue of the explicit vector length.
+ VPValue *getEVL() const { return getOperand(1); }
+
+ /// Generate the wide load or store, and shuffles.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// The recipe only uses the first lane of the address, and EVL operand.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
- return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
+ return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op) ||
+ Op == getEVL();
}
- Instruction *getInsertPos() const { return IG->getInsertPos(); }
+ unsigned getNumStoreOperands() const override {
+ return getNumOperands() - (HasMask ? 3 : 2);
+ }
+
+ ArrayRef<VPValue *> getStoredValues() const override {
+ // The first operand is the address, and the second operand is EVL, followed
+ // by the stored values, followe by an optional mask.
+ return ArrayRef<VPValue *>(op_begin(), getNumOperands())
+ .slice(2, getNumStoreOperands());
+ }
----------------
lukel97 wrote:
Can this be moved to VPInterleaveBase if we compute it as `iterator_range(op_end() - getNumStoreOperands(), op_end())`?
https://github.com/llvm/llvm-project/pull/152070
More information about the llvm-commits
mailing list