[llvm-branch-commits] [llvm] [VPlan] Implement compressed widening of memory instructions (PR #166956)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Nov 7 07:24:02 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-llvm-analysis
Author: Sergey Kachkov (skachkov-sc)
<details>
<summary>Changes</summary>
RFC link: https://discourse.llvm.org/t/rfc-loop-vectorization-of-compress-store-expand-load-patterns/86442
---
Full diff: https://github.com/llvm/llvm-project/pull/166956.diff
5 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+1)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+17-7)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+20-12)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+17-6)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+6-5)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0f17312b03827..e8769f5860c77 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1442,6 +1442,7 @@ class TargetTransformInfo {
Normal, ///< The cast is used with a normal load/store.
Masked, ///< The cast is used with a masked load/store.
GatherScatter, ///< The cast is used with a gather/scatter.
+ Compressed, ///< The cast is used with an expand load/compress store.
Interleave, ///< The cast is used with an interleaved load/store.
Reversed, ///< The cast is used with a reversed load/store.
};
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 914018591d832..25e8a63eae9cd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1027,6 +1027,7 @@ class LoopVectorizationCostModel {
CM_Widen_Reverse, // For consecutive accesses with stride -1.
CM_Interleave,
CM_GatherScatter,
+ CM_Compressed,
CM_Scalarize,
CM_VectorCall,
CM_IntrinsicCall
@@ -3108,9 +3109,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
if (IsUniformMemOpUse(I))
return true;
- return (WideningDecision == CM_Widen ||
- WideningDecision == CM_Widen_Reverse ||
- WideningDecision == CM_Interleave);
+ return (
+ WideningDecision == CM_Widen || WideningDecision == CM_Widen_Reverse ||
+ WideningDecision == CM_Interleave || WideningDecision == CM_Compressed);
};
// Returns true if Ptr is the pointer operand of a memory access instruction
@@ -5191,12 +5192,17 @@ InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost(
Instruction *I, ElementCount VF, InstWidening Decision) {
Type *ValTy = getLoadStoreType(I);
auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
+ const Align Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ if (Decision == CM_Compressed)
+ return TTI.getExpandCompressMemoryOpCost(I->getOpcode(), VectorTy,
+ /*VariableMask*/ true, Alignment,
+ CostKind, I);
+
assert((Decision == CM_Widen || Decision == CM_Widen_Reverse) &&
"Expected widen decision.");
- const Align Alignment = getLoadStoreAlignment(I);
InstructionCost Cost = 0;
if (Legal->isMaskRequired(I)) {
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
@@ -6299,6 +6305,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
switch (getWideningDecision(I, VF)) {
case LoopVectorizationCostModel::CM_GatherScatter:
return TTI::CastContextHint::GatherScatter;
+ case LoopVectorizationCostModel::CM_Compressed:
+ return TTI::CastContextHint::Compressed;
case LoopVectorizationCostModel::CM_Interleave:
return TTI::CastContextHint::Interleave;
case LoopVectorizationCostModel::CM_Scalarize:
@@ -7514,8 +7522,9 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
LoopVectorizationCostModel::InstWidening Decision =
CM.getWideningDecision(I, Range.Start);
bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
+ bool Compressed = Decision == LoopVectorizationCostModel::CM_Compressed;
bool Consecutive =
- Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
+ Reverse || Compressed || Decision == LoopVectorizationCostModel::CM_Widen;
VPValue *Ptr = isa<LoadInst>(I) ? Operands[0] : Operands[1];
if (Consecutive) {
@@ -7545,11 +7554,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
- VPIRMetadata(*Load, LVer), I->getDebugLoc());
+ Compressed, VPIRMetadata(*Load, LVer),
+ I->getDebugLoc());
StoreInst *Store = cast<StoreInst>(I);
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
- Reverse, VPIRMetadata(*Store, LVer),
+ Reverse, Compressed, VPIRMetadata(*Store, LVer),
I->getDebugLoc());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bbb03fbdff7a2..26256951a9c6c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3193,6 +3193,9 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
/// Whether the consecutive accessed addresses are in reverse order.
bool Reverse;
+ /// Whether the consecutive accessed addresses are compressed with mask value.
+ bool Compressed;
+
/// Whether the memory access is masked.
bool IsMasked = false;
@@ -3206,12 +3209,13 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
std::initializer_list<VPValue *> Operands,
- bool Consecutive, bool Reverse,
+ bool Consecutive, bool Reverse, bool Compressed,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive),
- Reverse(Reverse) {
+ Reverse(Reverse), Compressed(Compressed) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ assert((Consecutive || !Compressed) && "Compressed implies consecutive");
assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
!Reverse &&
"Reversed acccess without VPVectorEndPointerRecipe address?");
@@ -3241,6 +3245,9 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
/// order.
bool isReverse() const { return Reverse; }
+ /// Return whether the consecutive loaded/stored addresses are compressed.
+ bool isCompressed() const { return Compressed; }
+
/// Return the address accessed by this recipe.
VPValue *getAddr() const { return getOperand(0); }
@@ -3274,18 +3281,18 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
public VPValue {
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse,
+ bool Consecutive, bool Reverse, bool Compressed,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
- Reverse, Metadata, DL),
+ Reverse, Compressed, Metadata, DL),
VPValue(this, &Load) {
setMask(Mask);
}
VPWidenLoadRecipe *clone() override {
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
- getMask(), Consecutive, Reverse, *this,
- getDebugLoc());
+ getMask(), Consecutive, Reverse, Compressed,
+ *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
@@ -3316,8 +3323,8 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
- {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
- L.getDebugLoc()),
+ {Addr, &EVL}, L.isConsecutive(), L.isReverse(),
+ L.isCompressed(), L, L.getDebugLoc()),
VPValue(this, &getIngredient()) {
setMask(Mask);
}
@@ -3355,16 +3362,16 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
VPValue *Mask, bool Consecutive, bool Reverse,
- const VPIRMetadata &Metadata, DebugLoc DL)
+ bool Compressed, const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
- Consecutive, Reverse, Metadata, DL) {
+ Consecutive, Reverse, Compressed, Metadata, DL) {
setMask(Mask);
}
VPWidenStoreRecipe *clone() override {
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
getStoredValue(), getMask(), Consecutive,
- Reverse, *this, getDebugLoc());
+ Reverse, Compressed, *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
@@ -3399,7 +3406,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
VPValue *Mask)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
{Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
- S.isReverse(), S, S.getDebugLoc()) {
+ S.isReverse(), S.isCompressed(), S,
+ S.getDebugLoc()) {
setMask(Mask);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 80cd112dbcd8a..0b0bd63ee2b28 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3565,8 +3565,12 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
InstructionCost Cost = 0;
if (IsMasked) {
- Cost +=
- Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind);
+ Cost += Compressed
+ ? Ctx.TTI.getExpandCompressMemoryOpCost(Opcode, Ty,
+ /*VariableMask*/ true,
+ Alignment, Ctx.CostKind)
+ : Ctx.TTI.getMaskedMemoryOpCost(Opcode, Ty, Alignment, AS,
+ Ctx.CostKind);
} else {
TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(
isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(this) ? getOperand(0)
@@ -3603,9 +3607,13 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,
"wide.masked.gather");
} else if (Mask) {
- NewLI =
- Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
- PoisonValue::get(DataTy), "wide.masked.load");
+ NewLI = Compressed
+ ? Builder.CreateMaskedExpandLoad(DataTy, Addr, Alignment, Mask,
+ PoisonValue::get(DataTy),
+ "wide.masked.expand.load")
+ : Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,
+ PoisonValue::get(DataTy),
+ "wide.masked.load");
} else {
NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");
}
@@ -3732,7 +3740,10 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
if (CreateScatter)
NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
else if (Mask)
- NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
+ NewSI = Compressed
+ ? Builder.CreateMaskedCompressStore(StoredVal, Addr, Alignment,
+ Mask)
+ : Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
else
NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
applyMetadata(*NewSI);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 48bd697397f41..cdfbc531ebfa6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -91,13 +91,14 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenLoadRecipe(
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
- false /*Consecutive*/, false /*Reverse*/, VPIRMetadata(*Load),
- Ingredient.getDebugLoc());
+ false /*Consecutive*/, false /*Reverse*/, false /*Compressed*/,
+ VPIRMetadata(*Load), Ingredient.getDebugLoc());
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenStoreRecipe(
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
- VPIRMetadata(*Store), Ingredient.getDebugLoc());
+ false /*Compressed*/, VPIRMetadata(*Store),
+ Ingredient.getDebugLoc());
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
@@ -4207,7 +4208,7 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
auto *LI = cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
auto *L = new VPWidenLoadRecipe(
*LI, LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
- /*Reverse=*/false, {}, LoadGroup->getDebugLoc());
+ /*Reverse=*/false, /*Compressed=*/false, {}, LoadGroup->getDebugLoc());
L->insertBefore(LoadGroup);
NarrowedOps.insert(L);
return L;
@@ -4344,7 +4345,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos());
auto *S = new VPWidenStoreRecipe(
*SI, StoreGroup->getAddr(), Res, nullptr, /*Consecutive=*/true,
- /*Reverse=*/false, {}, StoreGroup->getDebugLoc());
+ /*Reverse=*/false, /*Compressed=*/false, {}, StoreGroup->getDebugLoc());
S->insertBefore(StoreGroup);
StoreGroup->eraseFromParent();
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/166956
More information about the llvm-branch-commits
mailing list