[llvm] [SandboxVec][BottomUpVec] Separate vectorization decisions from code generation (PR #127727)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 17:00:36 PST 2025
https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/127727
>From d8d9eb17479ca0a17697574df5e902d5bf3af703 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Tue, 4 Feb 2025 21:44:49 -0800
Subject: [PATCH] [SandboxVec][BottomUpVec] Separate vectorization decisions
from code generation
Up until now the generation of vector instructions was taking place during the
top-down post-order traversal of vectorizeRec(). The issue with this approach
is that the vector instructions emitted during the traversal can be reordered
by the scheduler, making it challenging to place them without breaking the
def-before-uses rule.
With this patch we separate the vectorization decisions (done in
`vectorizeRec()`) from the code generation phase (`emitVectors()`).
The vectorization decisions as stored in the `Actions` vector and are used
by `emitVectors()` to drive code generation.
---
.../Vectorize/SandboxVectorizer/InstrMaps.h | 67 +++--
.../Vectorize/SandboxVectorizer/Legality.h | 33 +--
.../SandboxVectorizer/Passes/BottomUpVec.h | 30 +-
.../Vectorize/SandboxVectorizer/InstrMaps.cpp | 13 +
.../SandboxVectorizer/Passes/BottomUpVec.cpp | 268 +++++++++++-------
.../SandboxVectorizer/bottomup_basic.ll | 34 +++
.../Transforms/SandboxVectorizer/scheduler.ll | 10 +-
.../SandboxVectorizer/InstrMapsTest.cpp | 77 ++---
.../SandboxVectorizer/LegalityTest.cpp | 26 +-
9 files changed, 346 insertions(+), 212 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
index 9bdf940fc77b7..4385df518a111 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
@@ -23,57 +23,54 @@
namespace llvm::sandboxir {
+class LegalityResult;
+
+struct Action {
+ unsigned Idx = 0;
+ const LegalityResult *LegalityRes = nullptr;
+ SmallVector<Value *, 4> Bndl;
+ SmallVector<Value *> UserBndl;
+ unsigned Depth;
+ SmallVector<Action *> Operands;
+ Value *Vec = nullptr;
+ Action(const LegalityResult *LR, ArrayRef<Value *> B, ArrayRef<Value *> UB,
+ unsigned Depth)
+ : LegalityRes(LR), Bndl(B), UserBndl(UB), Depth(Depth) {}
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const;
+ void dump() const;
+ friend raw_ostream &operator<<(raw_ostream &OS, const Action &A) {
+ A.print(OS);
+ return OS;
+ }
+#endif // NDEBUG
+};
+
/// Maps the original instructions to the vectorized instrs and the reverse.
/// For now an original instr can only map to a single vector.
class InstrMaps {
/// A map from the original values that got combined into vectors, to the
- /// vector value(s).
- DenseMap<Value *, Value *> OrigToVectorMap;
- /// A map from the vector value to a map of the original value to its lane.
+ /// vectorization Action.
+ DenseMap<Value *, Action *> OrigToVectorMap;
+ /// A map from the vec Action to a map of the original value to its lane.
/// Please note that for constant vectors, there may multiple original values
/// with the same lane, as they may be coming from vectorizing different
/// original values.
- DenseMap<Value *, DenseMap<Value *, unsigned>> VectorToOrigLaneMap;
- Context &Ctx;
+ DenseMap<Action *, DenseMap<Value *, unsigned>> VectorToOrigLaneMap;
std::optional<Context::CallbackID> EraseInstrCB;
-private:
- void notifyEraseInstr(Value *V) {
- // We don't know if V is an original or a vector value.
- auto It = OrigToVectorMap.find(V);
- if (It != OrigToVectorMap.end()) {
- // V is an original value.
- // Remove it from VectorToOrigLaneMap.
- Value *Vec = It->second;
- VectorToOrigLaneMap[Vec].erase(V);
- // Now erase V from OrigToVectorMap.
- OrigToVectorMap.erase(It);
- } else {
- // V is a vector value.
- // Go over the original values it came from and remove them from
- // OrigToVectorMap.
- for (auto [Orig, Lane] : VectorToOrigLaneMap[V])
- OrigToVectorMap.erase(Orig);
- // Now erase V from VectorToOrigLaneMap.
- VectorToOrigLaneMap.erase(V);
- }
- }
-
public:
- InstrMaps(Context &Ctx) : Ctx(Ctx) {
- EraseInstrCB = Ctx.registerEraseInstrCallback(
- [this](Instruction *I) { notifyEraseInstr(I); });
- }
- ~InstrMaps() { Ctx.unregisterEraseInstrCallback(*EraseInstrCB); }
+ InstrMaps() = default;
+ ~InstrMaps() = default;
/// \Returns the vector value that we got from vectorizing \p Orig, or
/// nullptr if not found.
- Value *getVectorForOrig(Value *Orig) const {
+ Action *getVectorForOrig(Value *Orig) const {
auto It = OrigToVectorMap.find(Orig);
return It != OrigToVectorMap.end() ? It->second : nullptr;
}
/// \Returns the lane of \p Orig before it got vectorized into \p Vec, or
/// nullopt if not found.
- std::optional<unsigned> getOrigLane(Value *Vec, Value *Orig) const {
+ std::optional<unsigned> getOrigLane(Action *Vec, Value *Orig) const {
auto It1 = VectorToOrigLaneMap.find(Vec);
if (It1 == VectorToOrigLaneMap.end())
return std::nullopt;
@@ -84,7 +81,7 @@ class InstrMaps {
return It2->second;
}
/// Update the map to reflect that \p Origs got vectorized into \p Vec.
- void registerVector(ArrayRef<Value *> Origs, Value *Vec) {
+ void registerVector(ArrayRef<Value *> Origs, Action *Vec) {
auto &OrigToLaneMap = VectorToOrigLaneMap[Vec];
unsigned Lane = 0;
for (Value *Orig : Origs) {
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index 132b12a7b4e6c..bc2942f87adcf 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -17,6 +17,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h"
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h"
namespace llvm::sandboxir {
@@ -206,22 +207,22 @@ class Widen final : public LegalityResult {
class DiamondReuse final : public LegalityResult {
friend class LegalityAnalysis;
- Value *Vec;
- DiamondReuse(Value *Vec)
+ Action *Vec;
+ DiamondReuse(Action *Vec)
: LegalityResult(LegalityResultID::DiamondReuse), Vec(Vec) {}
public:
static bool classof(const LegalityResult *From) {
return From->getSubclassID() == LegalityResultID::DiamondReuse;
}
- Value *getVector() const { return Vec; }
+ Action *getVector() const { return Vec; }
};
class DiamondReuseWithShuffle final : public LegalityResult {
friend class LegalityAnalysis;
- Value *Vec;
+ Action *Vec;
ShuffleMask Mask;
- DiamondReuseWithShuffle(Value *Vec, const ShuffleMask &Mask)
+ DiamondReuseWithShuffle(Action *Vec, const ShuffleMask &Mask)
: LegalityResult(LegalityResultID::DiamondReuseWithShuffle), Vec(Vec),
Mask(Mask) {}
@@ -229,7 +230,7 @@ class DiamondReuseWithShuffle final : public LegalityResult {
static bool classof(const LegalityResult *From) {
return From->getSubclassID() == LegalityResultID::DiamondReuseWithShuffle;
}
- Value *getVector() const { return Vec; }
+ Action *getVector() const { return Vec; }
const ShuffleMask &getMask() const { return Mask; }
};
@@ -250,18 +251,18 @@ class CollectDescr {
/// Describes how to get a value element. If the value is a vector then it
/// also provides the index to extract it from.
class ExtractElementDescr {
- Value *V;
+ PointerUnion<Action *, Value *> V = nullptr;
/// The index in `V` that the value can be extracted from.
- /// This is nullopt if we need to use `V` as a whole.
- std::optional<int> ExtractIdx;
+ int ExtractIdx = 0;
public:
- ExtractElementDescr(Value *V, int ExtractIdx)
+ ExtractElementDescr(Action *V, int ExtractIdx)
: V(V), ExtractIdx(ExtractIdx) {}
- ExtractElementDescr(Value *V) : V(V), ExtractIdx(std::nullopt) {}
- Value *getValue() const { return V; }
- bool needsExtract() const { return ExtractIdx.has_value(); }
- int getExtractIdx() const { return *ExtractIdx; }
+ ExtractElementDescr(Value *V) : V(V) {}
+ Action *getValue() const { return cast<Action *>(V); }
+ Value *getScalar() const { return cast<Value *>(V); }
+ bool needsExtract() const { return isa<Action *>(V); }
+ int getExtractIdx() const { return ExtractIdx; }
};
using DescrVecT = SmallVector<ExtractElementDescr, 4>;
@@ -272,11 +273,11 @@ class CollectDescr {
: Descrs(std::move(Descrs)) {}
/// If all elements come from a single vector input, then return that vector
/// and also the shuffle mask required to get them in order.
- std::optional<std::pair<Value *, ShuffleMask>> getSingleInput() const {
+ std::optional<std::pair<Action *, ShuffleMask>> getSingleInput() const {
const auto &Descr0 = *Descrs.begin();
- Value *V0 = Descr0.getValue();
if (!Descr0.needsExtract())
return std::nullopt;
+ auto *V0 = Descr0.getValue();
ShuffleMask::IndicesVecT MaskIndices;
MaskIndices.push_back(Descr0.getExtractIdx());
for (const auto &Descr : drop_begin(Descrs)) {
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
index daf6499213d48..b28e9948d6f55 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
@@ -58,9 +58,33 @@ class BottomUpVec final : public RegionPass {
/// function helps collect these instructions (along with the pointer operands
/// for loads/stores) so that they can be cleaned up later.
void collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl);
- /// Recursively try to vectorize \p Bndl and its operands.
- Value *vectorizeRec(ArrayRef<Value *> Bndl, ArrayRef<Value *> UserBndl,
- unsigned Depth);
+
+ /// Helper class describing how(if) to vectorize the code.
+ class ActionsVector {
+ private:
+ SmallVector<std::unique_ptr<Action>, 16> Actions;
+
+ public:
+ auto begin() const { return Actions.begin(); }
+ auto end() const { return Actions.end(); }
+ void push_back(std::unique_ptr<Action> &&ActPtr) {
+ ActPtr->Idx = Actions.size();
+ Actions.push_back(std::move(ActPtr));
+ }
+ void clear() { Actions.clear(); }
+#ifndef NDEBUG
+ void print(raw_ostream &OS) const;
+ void dump() const;
+#endif // NDEBUG
+ };
+ ActionsVector Actions;
+ /// Recursively try to vectorize \p Bndl and its operands. This populates the
+ /// `Actions` vector.
+ Action *vectorizeRec(ArrayRef<Value *> Bndl, ArrayRef<Value *> UserBndl,
+ unsigned Depth);
+ /// Generate vector instructions based on `Actions` and return the last vector
+ /// created.
+ Value *emitVectors();
/// Entry point for vectorization starting from \p Seeds.
bool tryVectorize(ArrayRef<Value *> Seeds);
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp
index 4df4829a04c41..37f1ec450f2eb 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/InstrMaps.cpp
@@ -8,10 +8,23 @@
#include "llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h"
namespace llvm::sandboxir {
#ifndef NDEBUG
+void Action::print(raw_ostream &OS) const {
+ OS << Idx << ". " << *LegalityRes << " Depth:" << Depth << "\n";
+ OS.indent(2) << "Bndl:\n";
+ for (Value *V : Bndl)
+ OS.indent(4) << *V << "\n";
+ OS.indent(2) << "UserBndl:\n";
+ for (Value *V : UserBndl)
+ OS.indent(4) << *V << "\n";
+}
+
+void Action::dump() const { print(dbgs()); }
+
void InstrMaps::dump() const {
print(dbgs());
dbgs() << "\n";
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index d57732090dcd6..14438181f2602 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -156,12 +156,7 @@ Value *BottomUpVec::createVectorInstr(ArrayRef<Value *> Bndl,
// TODO: Propagate debug info.
};
- auto *VecI = CreateVectorInstr(Bndl, Operands);
- if (VecI != nullptr) {
- Change = true;
- IMaps->registerVector(Bndl, VecI);
- }
- return VecI;
+ return CreateVectorInstr(Bndl, Operands);
}
void BottomUpVec::tryEraseDeadInstrs() {
@@ -266,135 +261,196 @@ void BottomUpVec::collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl) {
}
}
-Value *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl,
- ArrayRef<Value *> UserBndl, unsigned Depth) {
- Value *NewVec = nullptr;
- auto *UserBB = !UserBndl.empty()
- ? cast<Instruction>(UserBndl.front())->getParent()
- : cast<Instruction>(Bndl[0])->getParent();
+Action *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl,
+ ArrayRef<Value *> UserBndl, unsigned Depth) {
const auto &LegalityRes = Legality->canVectorize(Bndl);
+ auto ActionPtr =
+ std::make_unique<Action>(&LegalityRes, Bndl, UserBndl, Depth);
+ SmallVector<Action *> Operands;
switch (LegalityRes.getSubclassID()) {
case LegalityResultID::Widen: {
auto *I = cast<Instruction>(Bndl[0]);
- SmallVector<Value *, 2> VecOperands;
switch (I->getOpcode()) {
case Instruction::Opcode::Load:
- // Don't recurse towards the pointer operand.
- VecOperands.push_back(cast<LoadInst>(I)->getPointerOperand());
break;
case Instruction::Opcode::Store: {
// Don't recurse towards the pointer operand.
- auto *VecOp = vectorizeRec(getOperand(Bndl, 0), Bndl, Depth + 1);
- VecOperands.push_back(VecOp);
- VecOperands.push_back(cast<StoreInst>(I)->getPointerOperand());
+ Action *OpA = vectorizeRec(getOperand(Bndl, 0), Bndl, Depth + 1);
+ Operands.push_back(OpA);
break;
}
default:
// Visit all operands.
for (auto OpIdx : seq<unsigned>(I->getNumOperands())) {
- auto *VecOp = vectorizeRec(getOperand(Bndl, OpIdx), Bndl, Depth + 1);
- VecOperands.push_back(VecOp);
+ Action *OpA = vectorizeRec(getOperand(Bndl, OpIdx), Bndl, Depth + 1);
+ Operands.push_back(OpA);
}
break;
}
- NewVec = createVectorInstr(Bndl, VecOperands);
-
- // Collect any potentially dead scalar instructions, including the original
- // scalars and pointer operands of loads/stores.
- if (NewVec != nullptr)
- collectPotentiallyDeadInstrs(Bndl);
+ // Update the maps to mark Bndl as "vectorized".
+ IMaps->registerVector(Bndl, ActionPtr.get());
break;
}
- case LegalityResultID::DiamondReuse: {
- NewVec = cast<DiamondReuse>(LegalityRes).getVector();
+ case LegalityResultID::DiamondReuse:
+ case LegalityResultID::DiamondReuseWithShuffle:
+ case LegalityResultID::DiamondReuseMultiInput:
+ case LegalityResultID::Pack:
break;
}
- case LegalityResultID::DiamondReuseWithShuffle: {
- auto *VecOp = cast<DiamondReuseWithShuffle>(LegalityRes).getVector();
- const ShuffleMask &Mask =
- cast<DiamondReuseWithShuffle>(LegalityRes).getMask();
- NewVec = createShuffle(VecOp, Mask, UserBB);
- assert(NewVec->getType() == VecOp->getType() &&
- "Expected same type! Bad mask ?");
- break;
+ // Create actions in post-order.
+ ActionPtr->Operands = std::move(Operands);
+ auto *Action = ActionPtr.get();
+ Actions.push_back(std::move(ActionPtr));
+ return Action;
+}
+
+#ifndef NDEBUG
+void BottomUpVec::ActionsVector::print(raw_ostream &OS) const {
+ for (auto [Idx, Action] : enumerate(Actions)) {
+ Action->print(OS);
+ OS << "\n";
}
- case LegalityResultID::DiamondReuseMultiInput: {
- const auto &Descr =
- cast<DiamondReuseMultiInput>(LegalityRes).getCollectDescr();
- Type *ResTy = VecUtils::getWideType(Bndl[0]->getType(), Bndl.size());
+}
+void BottomUpVec::ActionsVector::dump() const { print(dbgs()); }
+#endif // NDEBUG
+
+Value *BottomUpVec::emitVectors() {
+ Value *NewVec = nullptr;
+ for (const auto &ActionPtr : Actions) {
+ ArrayRef<Value *> Bndl = ActionPtr->Bndl;
+ ArrayRef<Value *> UserBndl = ActionPtr->UserBndl;
+ const LegalityResult &LegalityRes = *ActionPtr->LegalityRes;
+ unsigned Depth = ActionPtr->Depth;
+ auto *UserBB = !UserBndl.empty()
+ ? cast<Instruction>(UserBndl.front())->getParent()
+ : cast<Instruction>(Bndl[0])->getParent();
- // TODO: Try to get WhereIt without creating a vector.
- SmallVector<Value *, 4> DescrInstrs;
- for (const auto &ElmDescr : Descr.getDescrs()) {
- if (auto *I = dyn_cast<Instruction>(ElmDescr.getValue()))
- DescrInstrs.push_back(I);
+ switch (LegalityRes.getSubclassID()) {
+ case LegalityResultID::Widen: {
+ auto *I = cast<Instruction>(Bndl[0]);
+ SmallVector<Value *, 2> VecOperands;
+ switch (I->getOpcode()) {
+ case Instruction::Opcode::Load:
+ VecOperands.push_back(cast<LoadInst>(I)->getPointerOperand());
+ break;
+ case Instruction::Opcode::Store: {
+ VecOperands.push_back(ActionPtr->Operands[0]->Vec);
+ VecOperands.push_back(cast<StoreInst>(I)->getPointerOperand());
+ break;
+ }
+ default:
+ // Visit all operands.
+ for (Action *OpA : ActionPtr->Operands) {
+ auto *VecOp = OpA->Vec;
+ VecOperands.push_back(VecOp);
+ }
+ break;
+ }
+ NewVec = createVectorInstr(ActionPtr->Bndl, VecOperands);
+ // Collect any potentially dead scalar instructions, including the
+ // original scalars and pointer operands of loads/stores.
+ if (NewVec != nullptr)
+ collectPotentiallyDeadInstrs(Bndl);
+ break;
+ }
+ case LegalityResultID::DiamondReuse: {
+ NewVec = cast<DiamondReuse>(LegalityRes).getVector()->Vec;
+ break;
+ }
+ case LegalityResultID::DiamondReuseWithShuffle: {
+ auto *VecOp = cast<DiamondReuseWithShuffle>(LegalityRes).getVector()->Vec;
+ const ShuffleMask &Mask =
+ cast<DiamondReuseWithShuffle>(LegalityRes).getMask();
+ NewVec = createShuffle(VecOp, Mask, UserBB);
+ assert(NewVec->getType() == VecOp->getType() &&
+ "Expected same type! Bad mask ?");
+ break;
}
- BasicBlock::iterator WhereIt =
- getInsertPointAfterInstrs(DescrInstrs, UserBB);
+ case LegalityResultID::DiamondReuseMultiInput: {
+ const auto &Descr =
+ cast<DiamondReuseMultiInput>(LegalityRes).getCollectDescr();
+ Type *ResTy = VecUtils::getWideType(Bndl[0]->getType(), Bndl.size());
- Value *LastV = PoisonValue::get(ResTy);
- unsigned Lane = 0;
- for (const auto &ElmDescr : Descr.getDescrs()) {
- Value *VecOp = ElmDescr.getValue();
- Context &Ctx = VecOp->getContext();
- Value *ValueToInsert;
- if (ElmDescr.needsExtract()) {
- ConstantInt *IdxC =
- ConstantInt::get(Type::getInt32Ty(Ctx), ElmDescr.getExtractIdx());
- ValueToInsert = ExtractElementInst::create(VecOp, IdxC, WhereIt,
- VecOp->getContext(), "VExt");
- } else {
- ValueToInsert = VecOp;
+ // TODO: Try to get WhereIt without creating a vector.
+ SmallVector<Value *, 4> DescrInstrs;
+ for (const auto &ElmDescr : Descr.getDescrs()) {
+ auto *V = ElmDescr.needsExtract() ? ElmDescr.getValue()->Vec
+ : ElmDescr.getScalar();
+ if (auto *I = dyn_cast<Instruction>(V))
+ DescrInstrs.push_back(I);
}
- auto NumLanesToInsert = VecUtils::getNumLanes(ValueToInsert);
- if (NumLanesToInsert == 1) {
- // If we are inserting a scalar element then we need a single insert.
- // %VIns = insert %DstVec, %SrcScalar, Lane
- ConstantInt *LaneC = ConstantInt::get(Type::getInt32Ty(Ctx), Lane);
- LastV = InsertElementInst::create(LastV, ValueToInsert, LaneC, WhereIt,
- Ctx, "VIns");
- } else {
- // If we are inserting a vector element then we need to extract and
- // insert each vector element one by one with a chain of extracts and
- // inserts, for example:
- // %VExt0 = extract %SrcVec, 0
- // %VIns0 = insert %DstVec, %Vect0, Lane + 0
- // %VExt1 = extract %SrcVec, 1
- // %VIns1 = insert %VIns0, %Vect0, Lane + 1
- for (unsigned LnCnt = 0; LnCnt != NumLanesToInsert; ++LnCnt) {
- auto *ExtrIdxC = ConstantInt::get(Type::getInt32Ty(Ctx), LnCnt);
- auto *ExtrI = ExtractElementInst::create(ValueToInsert, ExtrIdxC,
- WhereIt, Ctx, "VExt");
- unsigned InsLane = Lane + LnCnt;
- auto *InsLaneC = ConstantInt::get(Type::getInt32Ty(Ctx), InsLane);
- LastV = InsertElementInst::create(LastV, ExtrI, InsLaneC, WhereIt,
- Ctx, "VIns");
+ BasicBlock::iterator WhereIt =
+ getInsertPointAfterInstrs(DescrInstrs, UserBB);
+
+ Value *LastV = PoisonValue::get(ResTy);
+ Context &Ctx = LastV->getContext();
+ unsigned Lane = 0;
+ for (const auto &ElmDescr : Descr.getDescrs()) {
+ Value *VecOp = nullptr;
+ Value *ValueToInsert;
+ if (ElmDescr.needsExtract()) {
+ VecOp = ElmDescr.getValue()->Vec;
+ ConstantInt *IdxC =
+ ConstantInt::get(Type::getInt32Ty(Ctx), ElmDescr.getExtractIdx());
+ ValueToInsert = ExtractElementInst::create(
+ VecOp, IdxC, WhereIt, VecOp->getContext(), "VExt");
+ } else {
+ ValueToInsert = ElmDescr.getScalar();
+ }
+ auto NumLanesToInsert = VecUtils::getNumLanes(ValueToInsert);
+ if (NumLanesToInsert == 1) {
+ // If we are inserting a scalar element then we need a single insert.
+ // %VIns = insert %DstVec, %SrcScalar, Lane
+ ConstantInt *LaneC = ConstantInt::get(Type::getInt32Ty(Ctx), Lane);
+ LastV = InsertElementInst::create(LastV, ValueToInsert, LaneC,
+ WhereIt, Ctx, "VIns");
+ } else {
+ // If we are inserting a vector element then we need to extract and
+ // insert each vector element one by one with a chain of extracts and
+ // inserts, for example:
+ // %VExt0 = extract %SrcVec, 0
+ // %VIns0 = insert %DstVec, %Vect0, Lane + 0
+ // %VExt1 = extract %SrcVec, 1
+ // %VIns1 = insert %VIns0, %Vect0, Lane + 1
+ for (unsigned LnCnt = 0; LnCnt != NumLanesToInsert; ++LnCnt) {
+ auto *ExtrIdxC = ConstantInt::get(Type::getInt32Ty(Ctx), LnCnt);
+ auto *ExtrI = ExtractElementInst::create(ValueToInsert, ExtrIdxC,
+ WhereIt, Ctx, "VExt");
+ unsigned InsLane = Lane + LnCnt;
+ auto *InsLaneC = ConstantInt::get(Type::getInt32Ty(Ctx), InsLane);
+ LastV = InsertElementInst::create(LastV, ExtrI, InsLaneC, WhereIt,
+ Ctx, "VIns");
+ }
}
+ Lane += NumLanesToInsert;
}
- Lane += NumLanesToInsert;
+ NewVec = LastV;
+ break;
+ }
+ case LegalityResultID::Pack: {
+ // If we can't vectorize the seeds then just return.
+ if (Depth == 0)
+ return nullptr;
+ NewVec = createPack(Bndl, UserBB);
+ break;
+ }
+ }
+ if (NewVec != nullptr) {
+ Change = true;
+ ActionPtr->Vec = NewVec;
}
- NewVec = LastV;
- break;
- }
- case LegalityResultID::Pack: {
- // If we can't vectorize the seeds then just return.
- if (Depth == 0)
- return nullptr;
- NewVec = createPack(Bndl, UserBB);
- break;
- }
- }
#ifndef NDEBUG
- if (AlwaysVerify) {
- // This helps find broken IR by constantly verifying the function. Note that
- // this is very expensive and should only be used for debugging.
- Instruction *I0 = isa<Instruction>(Bndl[0])
- ? cast<Instruction>(Bndl[0])
- : cast<Instruction>(UserBndl[0]);
- assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) &&
- "Broken function!");
+ if (AlwaysVerify) {
+ // This helps find broken IR by constantly verifying the function. Note
+ // that this is very expensive and should only be used for debugging.
+ Instruction *I0 = isa<Instruction>(Bndl[0])
+ ? cast<Instruction>(Bndl[0])
+ : cast<Instruction>(UserBndl[0]);
+ assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) &&
+ "Broken function!");
+ }
+#endif // NDEBUG
}
-#endif
return NewVec;
}
@@ -402,7 +458,9 @@ bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) {
Change = false;
DeadInstrCandidates.clear();
Legality->clear();
+ Actions.clear();
vectorizeRec(Bndl, {}, /*Depth=*/0);
+ emitVectors();
tryEraseDeadInstrs();
return Change;
}
@@ -411,7 +469,7 @@ bool BottomUpVec::runOnRegion(Region &Rgn, const Analyses &A) {
const auto &SeedSlice = Rgn.getAux();
assert(SeedSlice.size() >= 2 && "Bad slice!");
Function &F = *SeedSlice[0]->getParent()->getParent();
- IMaps = std::make_unique<InstrMaps>(F.getContext());
+ IMaps = std::make_unique<InstrMaps>();
Legality = std::make_unique<LegalityAnalysis>(
A.getAA(), A.getScalarEvolution(), F.getParent()->getDataLayout(),
F.getContext(), *IMaps);
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index c076c0e849fa9..fc5795708c7d8 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -352,3 +352,37 @@ define void @diamondWithConstantVector(ptr %ptr) {
store i32 %orB1, ptr %gepB1
ret void
}
+
+; Check that we don't get def-after-use errors due to wrong placement
+; of new vector instructions.
+define void @vecInstrsPlacement(ptr %ptr0) {
+; CHECK-LABEL: define void @vecInstrsPlacement(
+; CHECK-SAME: ptr [[PTR0:%.*]]) {
+; CHECK-NEXT: [[VECL2:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
+; CHECK-NEXT: [[VECL:%.*]] = load <2 x double>, ptr [[PTR0]], align 8
+; CHECK-NEXT: [[VEC2:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
+; CHECK-NEXT: [[VEC:%.*]] = fmul <2 x double> [[VECL]], [[VECL2]]
+; CHECK-NEXT: [[VEC5:%.*]] = fadd <2 x double> [[VEC]], [[VEC2]]
+; CHECK-NEXT: store <2 x double> [[VEC5]], ptr [[PTR0]], align 8
+; CHECK-NEXT: ret void
+;
+ %ptr1 = getelementptr inbounds double, ptr %ptr0, i64 1
+ %ldA_0 = load double, ptr %ptr0
+ %ldA_1 = load double, ptr %ptr1
+
+ %ldB_0 = load double, ptr %ptr0
+ %ldB_1 = load double, ptr %ptr1
+
+ %mul0 = fmul double %ldA_0, %ldB_0
+ %mul1 = fmul double %ldA_1, %ldB_1
+
+ %mul2 = fmul double %ldA_0, %ldB_0
+ %mul3 = fmul double %ldA_1, %ldB_1
+
+ %add0 = fadd double %mul0, %mul2
+ %add1 = fadd double %mul1, %mul3
+
+ store double %add0, ptr %ptr0
+ store double %add1, ptr %ptr1
+ ret void
+}
diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
index 7741d8c64c8fc..5b9177ba4b3bf 100644
--- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
@@ -7,17 +7,17 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) {
; CHECK-LABEL: define void @check_dag_scheduler_update(
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) {
; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P]], align 4
-; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32
+; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
+; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 34
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4
; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33
; CHECK-NEXT: [[I4:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4
-; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 34
+; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr i32, ptr [[P]], i64 32
; CHECK-NEXT: [[I6:%.*]] = load i32, ptr [[ARRAYIDX18]], align 4
; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0
-; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I2]], i32 1
+; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[I6]], i32 1
; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[I4]], i32 2
-; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I6]], i32 3
-; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[P]], align 4
+; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[I2]], i32 3
; CHECK-NEXT: [[VEC:%.*]] = add nsw <4 x i32> [[PACK3]], [[VECL]]
; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[P1]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
index 5b033f0edcb02..c8fee1c24dbcb 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
@@ -53,37 +53,41 @@ define void @foo(i8 %v0, i8 %v1, i8 %v2, i8 %v3, <2 x i8> %vec) {
auto *VAdd0 = cast<sandboxir::BinaryOperator>(&*It++);
[[maybe_unused]] auto *Ret = cast<sandboxir::ReturnInst>(&*It++);
- sandboxir::InstrMaps IMaps(Ctx);
- // Check with empty IMaps.
- EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr);
- EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr);
- EXPECT_FALSE(IMaps.getOrigLane(Add0, Add0));
- // Check with 1 match.
- IMaps.registerVector({Add0, Add1}, VAdd0);
- EXPECT_EQ(IMaps.getVectorForOrig(Add0), VAdd0);
- EXPECT_EQ(IMaps.getVectorForOrig(Add1), VAdd0);
- EXPECT_FALSE(IMaps.getOrigLane(VAdd0, VAdd0)); // Bad Orig value
- EXPECT_FALSE(IMaps.getOrigLane(Add0, Add0)); // Bad Vector value
- EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add0), 0U);
- EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1U);
- // Check when the same vector maps to different original values (which is
- // common for vector constants).
- IMaps.registerVector({Add2, Add3}, VAdd0);
- EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add2), 0U);
- EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add3), 1U);
- // Check when we register for a second time.
+ sandboxir::InstrMaps IMaps;
+ {
+ // Check with empty IMaps.
+ sandboxir::Action A(nullptr, {Add0}, {}, 0);
+ EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr);
+ EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr);
+ EXPECT_FALSE(IMaps.getOrigLane(&A, Add0));
+ }
+ {
+ // Check with 1 match.
+ sandboxir::Action A(nullptr, {Add0, Add1}, {}, 0);
+ sandboxir::Action OtherA(nullptr, {}, {}, 0);
+ IMaps.registerVector({Add0, Add1}, &A);
+ EXPECT_EQ(IMaps.getVectorForOrig(Add0), &A);
+ EXPECT_EQ(IMaps.getVectorForOrig(Add1), &A);
+ EXPECT_FALSE(IMaps.getOrigLane(&A, VAdd0)); // Bad Orig value
+ EXPECT_FALSE(IMaps.getOrigLane(&OtherA, Add0)); // Bad Vector value
+ EXPECT_EQ(*IMaps.getOrigLane(&A, Add0), 0U);
+ EXPECT_EQ(*IMaps.getOrigLane(&A, Add1), 1U);
+ }
+ {
+ // Check when the same vector maps to different original values (which is
+ // common for vector constants).
+ sandboxir::Action A(nullptr, {Add2, Add3}, {}, 0);
+ IMaps.registerVector({Add2, Add3}, &A);
+ EXPECT_EQ(*IMaps.getOrigLane(&A, Add2), 0U);
+ EXPECT_EQ(*IMaps.getOrigLane(&A, Add3), 1U);
+ }
+ {
+ // Check when we register for a second time.
+ sandboxir::Action A(nullptr, {Add2, Add3}, {}, 0);
#ifndef NDEBUG
- EXPECT_DEATH(IMaps.registerVector({Add1, Add0}, VAdd0), ".*exists.*");
+ EXPECT_DEATH(IMaps.registerVector({Add1, Add0}, &A), ".*exists.*");
#endif // NDEBUG
- // Check callbacks: erase original instr.
- Add0->eraseFromParent();
- EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add0));
- EXPECT_EQ(*IMaps.getOrigLane(VAdd0, Add1), 1U);
- EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr);
- // Check callbacks: erase vector instr.
- VAdd0->eraseFromParent();
- EXPECT_FALSE(IMaps.getOrigLane(VAdd0, Add1));
- EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr);
+ }
}
TEST_F(InstrMapsTest, VectorLanes) {
@@ -91,7 +95,6 @@ TEST_F(InstrMapsTest, VectorLanes) {
define void @foo(<2 x i8> %v0, <2 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3) {
%vadd0 = add <2 x i8> %v0, %v1
%vadd1 = add <2 x i8> %v0, %v1
- %vadd2 = add <4 x i8> %v2, %v3
ret void
}
)IR");
@@ -103,12 +106,14 @@ define void @foo(<2 x i8> %v0, <2 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3) {
auto *VAdd0 = cast<sandboxir::BinaryOperator>(&*It++);
auto *VAdd1 = cast<sandboxir::BinaryOperator>(&*It++);
- auto *VAdd2 = cast<sandboxir::BinaryOperator>(&*It++);
- sandboxir::InstrMaps IMaps(Ctx);
+ sandboxir::InstrMaps IMaps;
- // Check that the vector lanes are calculated correctly.
- IMaps.registerVector({VAdd0, VAdd1}, VAdd2);
- EXPECT_EQ(*IMaps.getOrigLane(VAdd2, VAdd0), 0U);
- EXPECT_EQ(*IMaps.getOrigLane(VAdd2, VAdd1), 2U);
+ {
+ // Check that the vector lanes are calculated correctly.
+ sandboxir::Action A(nullptr, {VAdd0, VAdd1}, {}, 0);
+ IMaps.registerVector({VAdd0, VAdd1}, &A);
+ EXPECT_EQ(*IMaps.getOrigLane(&A, VAdd0), 0U);
+ EXPECT_EQ(*IMaps.getOrigLane(&A, VAdd1), 2U);
+ }
}
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
index 15f8166b705fc..99519d17d0e8e 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
@@ -133,7 +133,7 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float
auto *Sel0 = cast<sandboxir::SelectInst>(&*It++);
auto *Sel1 = cast<sandboxir::SelectInst>(&*It++);
- llvm::sandboxir::InstrMaps IMaps(Ctx);
+ llvm::sandboxir::InstrMaps IMaps;
sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps);
const auto &Result =
Legality.canVectorize({St0, St1}, /*SkipScheduling=*/true);
@@ -285,7 +285,7 @@ define void @foo(ptr %ptr) {
auto *St0 = cast<sandboxir::StoreInst>(&*It++);
auto *St1 = cast<sandboxir::StoreInst>(&*It++);
- llvm::sandboxir::InstrMaps IMaps(Ctx);
+ llvm::sandboxir::InstrMaps IMaps;
sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps);
{
// Can vectorize St0,St1.
@@ -321,7 +321,7 @@ define void @foo() {
};
sandboxir::Context Ctx(C);
- llvm::sandboxir::InstrMaps IMaps(Ctx);
+ llvm::sandboxir::InstrMaps IMaps;
sandboxir::LegalityAnalysis Legality(*AA, *SE, DL, Ctx, IMaps);
EXPECT_TRUE(
Matches(Legality.createLegalityResult<sandboxir::Widen>(), "Widen"));
@@ -368,32 +368,34 @@ define void @foo(ptr %ptr) {
sandboxir::CollectDescr::DescrVecT Descrs;
using EEDescr = sandboxir::CollectDescr::ExtractElementDescr;
-
+ SmallVector<sandboxir::Value *> Bndl({VLd});
+ SmallVector<sandboxir::Value *> UB;
+ sandboxir::Action VLdA(nullptr, Bndl, UB, 0);
{
// Check single input, no shuffle.
- Descrs.push_back(EEDescr(VLd, 0));
- Descrs.push_back(EEDescr(VLd, 1));
+ Descrs.push_back(EEDescr(&VLdA, 0));
+ Descrs.push_back(EEDescr(&VLdA, 1));
sandboxir::CollectDescr CD(std::move(Descrs));
EXPECT_TRUE(CD.getSingleInput());
- EXPECT_EQ(CD.getSingleInput()->first, VLd);
+ EXPECT_EQ(CD.getSingleInput()->first, &VLdA);
EXPECT_THAT(CD.getSingleInput()->second, testing::ElementsAre(0, 1));
EXPECT_TRUE(CD.hasVectorInputs());
}
{
// Check single input, shuffle.
- Descrs.push_back(EEDescr(VLd, 1));
- Descrs.push_back(EEDescr(VLd, 0));
+ Descrs.push_back(EEDescr(&VLdA, 1));
+ Descrs.push_back(EEDescr(&VLdA, 0));
sandboxir::CollectDescr CD(std::move(Descrs));
EXPECT_TRUE(CD.getSingleInput());
- EXPECT_EQ(CD.getSingleInput()->first, VLd);
+ EXPECT_EQ(CD.getSingleInput()->first, &VLdA);
EXPECT_THAT(CD.getSingleInput()->second, testing::ElementsAre(1, 0));
EXPECT_TRUE(CD.hasVectorInputs());
}
{
// Check multiple inputs.
Descrs.push_back(EEDescr(Ld0));
- Descrs.push_back(EEDescr(VLd, 0));
- Descrs.push_back(EEDescr(VLd, 1));
+ Descrs.push_back(EEDescr(&VLdA, 0));
+ Descrs.push_back(EEDescr(&VLdA, 1));
sandboxir::CollectDescr CD(std::move(Descrs));
EXPECT_FALSE(CD.getSingleInput());
EXPECT_TRUE(CD.hasVectorInputs());
More information about the llvm-commits
mailing list