[llvm] [WIP][LV][VPlan] For reverse iterative continuous load and store access, use unit load/store (PR #130032)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 6 02:29:57 PST 2025
https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/130032
>From 344c2e105eff990a77370829b0544519051e3ef7 Mon Sep 17 00:00:00 2001
From: "Liqin.Weng" <liqin.weng at spacemit.com>
Date: Thu, 6 Mar 2025 17:37:15 +0800
Subject: [PATCH] [LV][VPlan] For reverse iterative continuous load and store
access, use unit load/store
---
.../Transforms/Vectorize/LoopVectorize.cpp | 9 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 86 ++++++++++---------
2 files changed, 52 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c447fa4843591..0b155c95f0221 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7487,6 +7487,13 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
// comparing against the legacy cost isn't desirable.
if (isa<VPPartialReductionRecipe>(&R))
return true;
+
+ // The VPlan-based cost model may calculate the cost of strided load/store
+ // which can't be modeled in the legacy cost model.
+ if (isa<VPWidenLoadEVLRecipe, VPWidenStoreEVLRecipe>(&R))
+ if (cast<VPWidenMemoryRecipe>(&R)->isReverse())
+ return true;
+
if (Instruction *UI = GetInstructionForCost(&R))
SeenInstrs.insert(UI);
}
@@ -8344,7 +8351,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
auto *GEP = dyn_cast<GetElementPtrInst>(
Ptr->getUnderlyingValue()->stripPointerCasts());
VPSingleDefRecipe *VectorPtr;
- if (Reverse) {
+ if (Reverse && !CM.foldTailWithEVL()) {
// When folding the tail, we may compute an address that we don't in the
// original scalar loop and it may not be inbounds. Drop Inbounds in that
// case.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e9f50e88867b2..93146eb83295a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2640,17 +2640,6 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-/// Use all-true mask for reverse rather than actual mask, as it avoids a
-/// dependence w/o affecting the result.
-static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
- Value *EVL, const Twine &Name) {
- VectorType *ValTy = cast<VectorType>(Operand->getType());
- Value *AllTrueMask =
- Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
- return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
- {Operand, AllTrueMask, EVL}, nullptr, Name);
-}
-
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
auto *LI = cast<LoadInst>(&Ingredient);
@@ -2665,19 +2654,26 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
Value *EVL = State.get(getEVL(), VPLane(0));
Value *Addr = State.get(getAddr(), !CreateGather);
Value *Mask = nullptr;
- if (VPValue *VPMask = getMask()) {
+ if (VPValue *VPMask = getMask())
Mask = State.get(VPMask);
- if (isReverse())
- Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
- } else {
+ else
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
- }
if (CreateGather) {
NewLI =
Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
nullptr, "wide.masked.gather");
} else {
+ if (isReverse()) {
+ auto *EltTy = DataTy->getElementType();
+ // if (EltTy->getScalarSizeInBits() !=
+ // EVL->getType()->getScalarSizeInBits())
+ // EVL = ConstantInt::getSigned(EVL->getType(),
+ // static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
+ auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts());
+ Value *Offset = Builder.CreateSub(State.Builder.getInt32(1), EVL);
+ Addr = Builder.CreateGEP(EltTy, Addr, Offset, "", GEP->isInBounds());
+ }
VectorBuilder VBuilder(Builder);
VBuilder.setEVL(EVL).setMask(Mask);
NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
@@ -2686,10 +2682,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
NewLI->addParamAttr(
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
State.addMetadata(NewLI, LI);
- Instruction *Res = NewLI;
- if (isReverse())
- Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
- State.set(this, Res);
+ State.set(this, NewLI);
}
InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
@@ -2707,14 +2700,13 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
unsigned AS =
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
- InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
- Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
- if (!Reverse)
- return Cost;
+ // if (!Reverse)
+ return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
+ AS, Ctx.CostKind);
- return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
- cast<VectorType>(Ty), {}, Ctx.CostKind,
- 0);
+ // return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty,
+ // getAddr()->getUnderlyingValue(), false,
+ // Alignment, Ctx.CostKind);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2775,7 +2767,8 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
auto *SI = cast<StoreInst>(&Ingredient);
-
+ Type *ScalarDataTy = getLoadStoreType(&Ingredient);
+ auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
VPValue *StoredValue = getStoredValue();
bool CreateScatter = !isConsecutive();
const Align Alignment = getLoadStoreAlignment(&Ingredient);
@@ -2786,22 +2779,32 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
CallInst *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue);
Value *EVL = State.get(getEVL(), VPLane(0));
- if (isReverse())
- StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
Value *Mask = nullptr;
- if (VPValue *VPMask = getMask()) {
+ if (VPValue *VPMask = getMask())
Mask = State.get(VPMask);
- if (isReverse())
- Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
- } else {
+ else
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
- }
+
Value *Addr = State.get(getAddr(), !CreateScatter);
if (CreateScatter) {
NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
Intrinsic::vp_scatter,
{StoredVal, Addr, Mask, EVL});
} else {
+ if (isReverse()) {
+ auto *EltTy = DataTy->getElementType();
+ // FIXME: we may need not deal with the size, the InstCombine will deal
+ // with the Offset Type if (EltTy->getScalarSizeInBits() !=
+ // EVL->getType()->getScalarSizeInBits())
+ // EVL = ConstantInt::getSigned(EVL->getType(),
+ // static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
+ auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts());
+ // Value *Offset =
+ // Builder.CreateSub(State.Builder.getIntN(EVL->getType()->getScalarSizeInBits(),
+ // 1), EVL);
+ Value *Offset = Builder.CreateSub(State.Builder.getInt32(1), EVL);
+ Addr = Builder.CreateGEP(EltTy, Addr, Offset, "", GEP->isInBounds());
+ }
VectorBuilder VBuilder(Builder);
VBuilder.setEVL(EVL).setMask(Mask);
NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
@@ -2828,14 +2831,13 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
unsigned AS =
getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
- InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
- Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
- if (!Reverse)
- return Cost;
+ // if (!Reverse)
+ return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
+ AS, Ctx.CostKind);
- return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
- cast<VectorType>(Ty), {}, Ctx.CostKind,
- 0);
+ // return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty,
+ // getAddr()->getUnderlyingValue(), false,
+ // Alignment, Ctx.CostKind);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
More information about the llvm-commits
mailing list