[llvm] r367208 - [NFC][ARM[ParallelDSP] Cleanup of BinOpChain
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 01:41:52 PDT 2019
Author: sam_parker
Date: Mon Jul 29 01:41:51 2019
New Revision: 367208
URL: http://llvm.org/viewvc/llvm-project?rev=367208&view=rev
Log:
[NFC][ARM[ParallelDSP] Cleanup of BinOpChain
- Remove some unused typedefs.
- Rename BinOpChain struct to MulCandidate.
- Remove the size method of MulCandidate.
- Store only the first input of the ValueList provided to
MulCandidate, as it's the only value we care about. This means we
don't have to perform any ugly (and unnecessary) iterations of the
list later on.
Modified:
llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp?rev=367208&r1=367207&r2=367208&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp Mon Jul 29 01:41:51 2019
@@ -47,38 +47,32 @@ DisableParallelDSP("disable-arm-parallel
namespace {
struct OpChain;
- struct BinOpChain;
+ struct MulCandidate;
class Reduction;
- using OpChainList = SmallVector<std::unique_ptr<BinOpChain>, 8>;
+ using MulCandList = SmallVector<std::unique_ptr<MulCandidate>, 8>;
using ReductionList = SmallVector<Reduction, 8>;
using ValueList = SmallVector<Value*, 8>;
using MemInstList = SmallVector<LoadInst*, 8>;
- using PMACPair = std::pair<BinOpChain*,BinOpChain*>;
+ using PMACPair = std::pair<MulCandidate*,MulCandidate*>;
using PMACPairList = SmallVector<PMACPair, 8>;
- using Instructions = SmallVector<Instruction*,16>;
- using MemLocList = SmallVector<MemoryLocation, 4>;
- // 'BinOpChain' holds the multiplication instructions that are candidates
+ // 'MulCandidate' holds the multiplication instructions that are candidates
// for parallel execution.
- struct BinOpChain {
+ struct MulCandidate {
Instruction *Root;
- ValueList AllValues;
- MemInstList VecLd; // List of all load instructions.
- ValueList LHS; // List of all (narrow) left hand operands.
- ValueList RHS; // List of all (narrow) right hand operands.
+ MemInstList VecLd; // Container for loads to widen.
+ Value* LHS;
+ Value* RHS;
bool Exchange = false;
bool ReadOnly = true;
- BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) :
- Root(I), LHS(lhs), RHS(rhs) {
- for (auto *V : LHS)
- AllValues.push_back(V);
- for (auto *V : RHS)
- AllValues.push_back(V);
- }
+ MulCandidate(Instruction *I, ValueList &lhs, ValueList &rhs) :
+ Root(I), LHS(lhs.front()), RHS(rhs.front()) { }
- unsigned size() const { return AllValues.size(); }
+ bool HasTwoLoadInputs() const {
+ return isa<LoadInst>(LHS) && isa<LoadInst>(RHS);
+ }
};
/// Represent a sequence of multiply-accumulate operations with the aim to
@@ -86,7 +80,7 @@ namespace {
class Reduction {
Instruction *Root = nullptr;
Value *Acc = nullptr;
- OpChainList Muls;
+ MulCandList Muls;
PMACPairList MulPairs;
SmallPtrSet<Instruction*, 4> Adds;
@@ -98,10 +92,10 @@ namespace {
/// Record an Add instruction that is a part of the this reduction.
void InsertAdd(Instruction *I) { Adds.insert(I); }
- /// Record a BinOpChain, rooted at a Mul instruction, that is a part of
+ /// Record a MulCandidate, rooted at a Mul instruction, that is a part of
/// this reduction.
void InsertMul(Instruction *I, ValueList &LHS, ValueList &RHS) {
- Muls.push_back(make_unique<BinOpChain>(I, LHS, RHS));
+ Muls.push_back(make_unique<MulCandidate>(I, LHS, RHS));
}
/// Add the incoming accumulator value, returns true if a value had not
@@ -114,9 +108,9 @@ namespace {
return true;
}
- /// Set two BinOpChains, rooted at muls, that can be executed as a single
+ /// Set two MulCandidates, rooted at muls, that can be executed as a single
/// parallel operation.
- void AddMulPair(BinOpChain *Mul0, BinOpChain *Mul1) {
+ void AddMulPair(MulCandidate *Mul0, MulCandidate *Mul1) {
MulPairs.push_back(std::make_pair(Mul0, Mul1));
}
@@ -133,11 +127,11 @@ namespace {
/// Return the set of adds that comprise the reduction.
SmallPtrSetImpl<Instruction*> &getAdds() { return Adds; }
- /// Return the BinOpChain, rooted at mul instruction, that comprise the
+ /// Return the MulCandidate, rooted at mul instruction, that comprise the
/// the reduction.
- OpChainList &getMuls() { return Muls; }
+ MulCandList &getMuls() { return Muls; }
- /// Return the BinOpChain, rooted at mul instructions, that have been
+ /// Return the MulCandidate, rooted at mul instructions, that have been
/// paired for parallel execution.
PMACPairList &getMulPairs() { return MulPairs; }
@@ -546,74 +540,57 @@ bool ARMParallelDSP::CreateParallelPairs
return false;
// Check that the muls operate directly upon sign extended loads.
- for (auto &MulChain : R.getMuls()) {
- // A mul has 2 operands, and a narrow op consist of sext and a load; thus
- // we expect at least 4 items in this operand value list.
- if (MulChain->size() < 4) {
- LLVM_DEBUG(dbgs() << "Operand list too short.\n");
+ for (auto &MulCand : R.getMuls()) {
+ if (!MulCand->HasTwoLoadInputs())
return false;
- }
- ValueList &LHS = static_cast<BinOpChain*>(MulChain.get())->LHS;
- ValueList &RHS = static_cast<BinOpChain*>(MulChain.get())->RHS;
-
- // Use +=2 to skip over the expected extend instructions.
- for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {
- if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))
- return false;
- }
}
- auto CanPair = [&](Reduction &R, BinOpChain *PMul0, BinOpChain *PMul1) {
+ auto CanPair = [&](Reduction &R, MulCandidate *PMul0, MulCandidate *PMul1) {
// The first elements of each vector should be loads with sexts. If we
// find that its two pairs of consecutive loads, then these can be
// transformed into two wider loads and the users can be replaced with
// DSP intrinsics.
- for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) {
- auto *Ld0 = dyn_cast<LoadInst>(PMul0->LHS[x]);
- auto *Ld1 = dyn_cast<LoadInst>(PMul1->LHS[x]);
- auto *Ld2 = dyn_cast<LoadInst>(PMul0->RHS[x]);
- auto *Ld3 = dyn_cast<LoadInst>(PMul1->RHS[x]);
-
- if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
- return false;
-
- LLVM_DEBUG(dbgs() << "Loads:\n"
- << " - " << *Ld0 << "\n"
- << " - " << *Ld1 << "\n"
- << " - " << *Ld2 << "\n"
- << " - " << *Ld3 << "\n");
-
- if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
- if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
- LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- R.AddMulPair(PMul0, PMul1);
- return true;
- } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
- LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n");
- PMul1->Exchange = true;
- R.AddMulPair(PMul0, PMul1);
- return true;
- }
- } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
- AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+ auto Ld0 = static_cast<LoadInst*>(PMul0->LHS);
+ auto Ld1 = static_cast<LoadInst*>(PMul1->LHS);
+ auto Ld2 = static_cast<LoadInst*>(PMul0->RHS);
+ auto Ld3 = static_cast<LoadInst*>(PMul1->RHS);
+
+ LLVM_DEBUG(dbgs() << "Loads:\n"
+ << " - " << *Ld0 << "\n"
+ << " - " << *Ld1 << "\n"
+ << " - " << *Ld2 << "\n"
+ << " - " << *Ld3 << "\n");
+
+ if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
+ if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
- LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n");
- LLVM_DEBUG(dbgs() << " and swapping muls\n");
- PMul0->Exchange = true;
- // Only the second operand can be exchanged, so swap the muls.
- R.AddMulPair(PMul1, PMul0);
+ R.AddMulPair(PMul0, PMul1);
+ return true;
+ } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ LLVM_DEBUG(dbgs() << " exchanging Ld2 and Ld3\n");
+ PMul1->Exchange = true;
+ R.AddMulPair(PMul0, PMul1);
return true;
}
+ } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
+ AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+ LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+ LLVM_DEBUG(dbgs() << " exchanging Ld0 and Ld1\n");
+ LLVM_DEBUG(dbgs() << " and swapping muls\n");
+ PMul0->Exchange = true;
+ // Only the second operand can be exchanged, so swap the muls.
+ R.AddMulPair(PMul1, PMul0);
+ return true;
}
return false;
};
- OpChainList &Muls = R.getMuls();
+ MulCandList &Muls = R.getMuls();
const unsigned Elems = Muls.size();
SmallPtrSet<const Instruction*, 4> Paired;
for (unsigned i = 0; i < Elems; ++i) {
- BinOpChain *PMul0 = static_cast<BinOpChain*>(Muls[i].get());
+ MulCandidate *PMul0 = static_cast<MulCandidate*>(Muls[i].get());
if (Paired.count(PMul0->Root))
continue;
@@ -621,7 +598,7 @@ bool ARMParallelDSP::CreateParallelPairs
if (i == j)
continue;
- BinOpChain *PMul1 = static_cast<BinOpChain*>(Muls[j].get());
+ MulCandidate *PMul1 = static_cast<MulCandidate*>(Muls[j].get());
if (Paired.count(PMul1->Root))
continue;
@@ -682,8 +659,8 @@ void ARMParallelDSP::InsertParallelMACs(
LLVM_DEBUG(dbgs() << "Root: " << *InsertAfter << "\n"
<< "Acc: " << *Acc << "\n");
for (auto &Pair : R.getMulPairs()) {
- BinOpChain *PMul0 = Pair.first;
- BinOpChain *PMul1 = Pair.second;
+ MulCandidate *PMul0 = Pair.first;
+ MulCandidate *PMul1 = Pair.second;
LLVM_DEBUG(dbgs() << "Muls:\n"
<< "- " << *PMul0->Root << "\n"
<< "- " << *PMul1->Root << "\n");
More information about the llvm-commits
mailing list