[llvm] r367208 - [NFC][ARM[ParallelDSP] Cleanup of BinOpChain

Mon Jul 29 01:41:52 PDT 2019

Author: sam_parker
Date: Mon Jul 29 01:41:51 2019
New Revision: 367208

URL: http://llvm.org/viewvc/llvm-project?rev=367208&view=rev
Log:
[NFC][ARM[ParallelDSP] Cleanup of BinOpChain

- Remove some unused typedefs.
- Rename BinOpChain struct to MulCandidate.
- Remove the size method of MulCandidate.
- Store only the first input of the ValueList provided to
  MulCandidate, as it's the only value we care about. This means we
  don't have to perform any ugly (and unnecessary) iterations of the 
  list later on.

Modified:
    llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp?rev=367208&r1=367207&r2=367208&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp Mon Jul 29 01:41:51 2019
@@ -47,38 +47,32 @@ DisableParallelDSP("disable-arm-parallel
 
 namespace {
   struct OpChain;
-  struct BinOpChain;
+  struct MulCandidate;
   class Reduction;
 
-  using OpChainList     = SmallVector<std::unique_ptr<BinOpChain>, 8>;
+  using MulCandList     = SmallVector<std::unique_ptr<MulCandidate>, 8>;
   using ReductionList   = SmallVector<Reduction, 8>;
   using ValueList       = SmallVector<Value*, 8>;
   using MemInstList     = SmallVector<LoadInst*, 8>;
-  using PMACPair        = std::pair<BinOpChain*,BinOpChain*>;
+  using PMACPair        = std::pair<MulCandidate*,MulCandidate*>;
   using PMACPairList    = SmallVector<PMACPair, 8>;
-  using Instructions    = SmallVector<Instruction*,16>;
-  using MemLocList      = SmallVector<MemoryLocation, 4>;
 
-  // 'BinOpChain' holds the multiplication instructions that are candidates
+  // 'MulCandidate' holds the multiplication instructions that are candidates
   // for parallel execution.
-  struct BinOpChain {
+  struct MulCandidate {
     Instruction   *Root;
-    ValueList     AllValues;
-    MemInstList   VecLd;    // List of all load instructions.
-    ValueList     LHS;      // List of all (narrow) left hand operands.
-    ValueList     RHS;      // List of all (narrow) right hand operands.
+    MemInstList   VecLd;    // Container for loads to widen.
+    Value*        LHS;
+    Value*        RHS;
     bool          Exchange = false;
     bool          ReadOnly = true;
 
-    BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) :
-      Root(I), LHS(lhs), RHS(rhs) {
-        for (auto *V : LHS)
-          AllValues.push_back(V);
-        for (auto *V : RHS)
-          AllValues.push_back(V);
-    }
+    MulCandidate(Instruction *I, ValueList &lhs, ValueList &rhs) :
+      Root(I), LHS(lhs.front()), RHS(rhs.front()) { }
 
-    unsigned size() const { return AllValues.size(); }
+    bool HasTwoLoadInputs() const {
+      return isa<LoadInst>(LHS) && isa<LoadInst>(RHS);
+    }
   };
 
   /// Represent a sequence of multiply-accumulate operations with the aim to
@@ -86,7 +80,7 @@ namespace {
   class Reduction {
     Instruction     *Root = nullptr;
     Value           *Acc = nullptr;
-    OpChainList     Muls;
+    MulCandList     Muls;
     PMACPairList        MulPairs;
     SmallPtrSet<Instruction*, 4> Adds;
 
@@ -98,10 +92,10 @@ namespace {
     /// Record an Add instruction that is a part of the this reduction.
     void InsertAdd(Instruction *I) { Adds.insert(I); }
 
-    /// Record a BinOpChain, rooted at a Mul instruction, that is a part of
+    /// Record a MulCandidate, rooted at a Mul instruction, that is a part of
     /// this reduction.
     void InsertMul(Instruction *I, ValueList &LHS, ValueList &RHS) {
-      Muls.push_back(make_unique<BinOpChain>(I, LHS, RHS));
+      Muls.push_back(make_unique<MulCandidate>(I, LHS, RHS));
     }
 
     /// Add the incoming accumulator value, returns true if a value had not
@@ -114,9 +108,9 @@ namespace {
       return true;
     }
 
-    /// Set two BinOpChains, rooted at muls, that can be executed as a single
+    /// Set two MulCandidates, rooted at muls, that can be executed as a single
     /// parallel operation.
-    void AddMulPair(BinOpChain *Mul0, BinOpChain *Mul1) {
+    void AddMulPair(MulCandidate *Mul0, MulCandidate *Mul1) {
       MulPairs.push_back(std::make_pair(Mul0, Mul1));
     }
 
@@ -133,11 +127,11 @@ namespace {
     /// Return the set of adds that comprise the reduction.
     SmallPtrSetImpl<Instruction*> &getAdds() { return Adds; }
 
-    /// Return the BinOpChain, rooted at mul instruction, that comprise the
+    /// Return the MulCandidate, rooted at mul instruction, that comprise the
     /// the reduction.
-    OpChainList &getMuls() { return Muls; }
+    MulCandList &getMuls() { return Muls; }
 
-    /// Return the BinOpChain, rooted at mul instructions, that have been
+    /// Return the MulCandidate, rooted at mul instructions, that have been
     /// paired for parallel execution.
     PMACPairList &getMulPairs() { return MulPairs; }
 
@@ -546,74 +540,57 @@ bool ARMParallelDSP::CreateParallelPairs
     return false;
 
   // Check that the muls operate directly upon sign extended loads.
-  for (auto &MulChain : R.getMuls()) {
-    // A mul has 2 operands, and a narrow op consist of sext and a load; thus
-    // we expect at least 4 items in this operand value list.
-    if (MulChain->size() < 4) {
-      LLVM_DEBUG(dbgs() << "Operand list too short.\n");
+  for (auto &MulCand : R.getMuls()) {
+    if (!MulCand->HasTwoLoadInputs())
       return false;
-    }
-    ValueList &LHS = static_cast<BinOpChain*>(MulChain.get())->LHS;
-    ValueList &RHS = static_cast<BinOpChain*>(MulChain.get())->RHS;
-
-    // Use +=2 to skip over the expected extend instructions.
-    for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {
-      if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))
-        return false;
-    }
   }
 
-  auto CanPair = [&](Reduction &R, BinOpChain *PMul0, BinOpChain *PMul1) {
+  auto CanPair = [&](Reduction &R, MulCandidate *PMul0, MulCandidate *PMul1) {
     // The first elements of each vector should be loads with sexts. If we
     // find that its two pairs of consecutive loads, then these can be
     // transformed into two wider loads and the users can be replaced with
     // DSP intrinsics.
-    for (unsigned x = 0; x < PMul0->LHS.size(); x += 2) {
-      auto *Ld0 = dyn_cast<LoadInst>(PMul0->LHS[x]);
-      auto *Ld1 = dyn_cast<LoadInst>(PMul1->LHS[x]);
-      auto *Ld2 = dyn_cast<LoadInst>(PMul0->RHS[x]);
-      auto *Ld3 = dyn_cast<LoadInst>(PMul1->RHS[x]);
-
-      if (!Ld0 || !Ld1 || !Ld2 || !Ld3)
-        return false;
-
-      LLVM_DEBUG(dbgs() << "Loads:\n"
-                 << " - " << *Ld0 << "\n"
-                 << " - " << *Ld1 << "\n"
-                 << " - " << *Ld2 << "\n"
-                 << " - " << *Ld3 << "\n");
-
-      if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
-        if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
-          LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
-          R.AddMulPair(PMul0, PMul1);
-          return true;
-        } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
-          LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
-          LLVM_DEBUG(dbgs() << "    exchanging Ld2 and Ld3\n");
-          PMul1->Exchange = true;
-          R.AddMulPair(PMul0, PMul1);
-          return true;
-        }
-      } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
-                 AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+    auto Ld0 = static_cast<LoadInst*>(PMul0->LHS);
+    auto Ld1 = static_cast<LoadInst*>(PMul1->LHS);
+    auto Ld2 = static_cast<LoadInst*>(PMul0->RHS);
+    auto Ld3 = static_cast<LoadInst*>(PMul1->RHS);
+
+    LLVM_DEBUG(dbgs() << "Loads:\n"
+               << " - " << *Ld0 << "\n"
+               << " - " << *Ld1 << "\n"
+               << " - " << *Ld2 << "\n"
+               << " - " << *Ld3 << "\n");
+
+    if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
+      if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
         LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
-        LLVM_DEBUG(dbgs() << "    exchanging Ld0 and Ld1\n");
-        LLVM_DEBUG(dbgs() << "    and swapping muls\n");
-        PMul0->Exchange = true;
-        // Only the second operand can be exchanged, so swap the muls.
-        R.AddMulPair(PMul1, PMul0);
+        R.AddMulPair(PMul0, PMul1);
+        return true;
+      } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
+        LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+        LLVM_DEBUG(dbgs() << "    exchanging Ld2 and Ld3\n");
+        PMul1->Exchange = true;
+        R.AddMulPair(PMul0, PMul1);
         return true;
       }
+    } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
+               AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
+      LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");
+      LLVM_DEBUG(dbgs() << "    exchanging Ld0 and Ld1\n");
+      LLVM_DEBUG(dbgs() << "    and swapping muls\n");
+      PMul0->Exchange = true;
+      // Only the second operand can be exchanged, so swap the muls.
+      R.AddMulPair(PMul1, PMul0);
+      return true;
     }
     return false;
   };
 
-  OpChainList &Muls = R.getMuls();
+  MulCandList &Muls = R.getMuls();
   const unsigned Elems = Muls.size();
   SmallPtrSet<const Instruction*, 4> Paired;
   for (unsigned i = 0; i < Elems; ++i) {
-    BinOpChain *PMul0 = static_cast<BinOpChain*>(Muls[i].get());
+    MulCandidate *PMul0 = static_cast<MulCandidate*>(Muls[i].get());
     if (Paired.count(PMul0->Root))
       continue;
 
@@ -621,7 +598,7 @@ bool ARMParallelDSP::CreateParallelPairs
       if (i == j)
         continue;
 
-      BinOpChain *PMul1 = static_cast<BinOpChain*>(Muls[j].get());
+      MulCandidate *PMul1 = static_cast<MulCandidate*>(Muls[j].get());
       if (Paired.count(PMul1->Root))
         continue;
 
@@ -682,8 +659,8 @@ void ARMParallelDSP::InsertParallelMACs(
   LLVM_DEBUG(dbgs() << "Root: " << *InsertAfter << "\n"
              << "Acc: " << *Acc << "\n");
   for (auto &Pair : R.getMulPairs()) {
-    BinOpChain *PMul0 = Pair.first;
-    BinOpChain *PMul1 = Pair.second;
+    MulCandidate *PMul0 = Pair.first;
+    MulCandidate *PMul1 = Pair.second;
     LLVM_DEBUG(dbgs() << "Muls:\n"
                << "- " << *PMul0->Root << "\n"
                << "- " << *PMul1->Root << "\n");