<div dir="ltr">This revision introduced a memory leak:  <a href="http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/21273/steps/check-llvm%20asan/logs/stdio">http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/21273/steps/check-llvm%20asan/logs/stdio</a>.  Please take a look.<div><br></div><div><pre style="font-family:"Courier New",courier,monotype,monospace;color:rgb(0,0,0);font-size:medium;text-decoration-style:initial;text-decoration-color:initial"><span class="gmail-stdout" style="font-family:"Courier New",courier,monotype,monospace;color:black">==11220==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 1024 byte(s) in 2 object(s) allocated from:

    #0 0xa4d088 in operator new(unsigned long) /b/sanitizer-x86_64-linux-fast/build/llvm/projects/compiler-rt/lib/asan/asan_new_delete.cc:92

    #1 0x19068f7 in AddMACCandidate(llvm::SmallVector<(anonymous namespace)::OpChain*, 8u>&, llvm::Instruction const*, llvm::Value*, llvm::Value*, int) /b/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/ARM/ARMParallelDSP.cpp:458:26

    #2 0x19000da in MatchParallelMACSequences /b/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/ARM/ARMParallelDSP.cpp:471:5

    #3 0x19000da in (anonymous namespace)::ARMParallelDSP::MatchSMLAD(llvm::Function&) /b/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/ARM/ARMParallelDSP.cpp:602

    #4 0x18fddb7 in (anonymous namespace)::ARMParallelDSP::runOnLoop(llvm::Loop*, llvm::LPPassManager&) /b/sanitizer-x86_64-linux-fast/build/llvm/lib/Target/ARM/ARMParallelDSP.cpp:195:17

    #5 0x3687f59 in llvm::LPPassManager::runOnFunction(llvm::Function&) /b/sanitizer-x86_64-linux-fast/build/llvm/lib/Analysis/LoopPass.cpp:217:23

    #6 0x47f755d in llvm::FPPassManager::runOnFunction(llvm::Function&) /b/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/LegacyPassManager.cpp:1586:27

    #7 0x47f7b72 in llvm::FPPassManager::runOnModule(llvm::Module&) /b/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/LegacyPassManager.cpp:1609:16

    #8 0x47f88e8 in runOnModule /b/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/LegacyPassManager.cpp:1669:27

    #9 0x47f88e8 in llvm::legacy::PassManagerImpl::run(llvm::Module&) /b/sanitizer-x86_64-linux-fast/build/llvm/lib/IR/LegacyPassManager.cpp:1774

    #10 0xa9286e in main /b/sanitizer-x86_64-linux-fast/build/llvm/tools/opt/opt.cpp:810:12

    #11 0x7f53d46c62e0 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x202e0)

SUMMARY: AddressSanitizer: 1024 byte(s) leaked in 2 allocation(s).

</span></pre><br class="gmail-Apple-interchange-newline"><br><div><br></div><div><br></div></div></div><br><div class="gmail_quote"><div dir="ltr">On Mon, Jul 23, 2018 at 8:26 AM Sam Parker via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: sam_parker<br>

Date: Mon Jul 23 08:25:59 2018<br>

New Revision: 337701<br>

<br>

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=337701&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=337701&view=rev</a><br>

Log:<br>

[ARM][NFC] ParallelDSP reorganisation<br>

<br>

In preparing to allow ARMParallelDSP pass to parallelise more than<br>

smlads, I've restructed some elements:<br>

<br>

- The ParallelMAC struct has been renamed to BinOpChain.<br>

- The BinOpChain struct holds two value lists: LHS and RHS, as well<br>

  as inheriting from the OpChain base class.<br>

- The OpChain struct holds all the values of the represented chain<br>

  and has had the memory locations functionality inserted into it.<br>

- ParallelMACList becomes OpChainList and it now holds pointers<br>

  instead of objects.<br>

<br>

Differential Revision: <a href="https://reviews.llvm.org/D49020" rel="noreferrer" target="_blank">https://reviews.llvm.org/D49020</a><br>

<br>

Modified:<br>

    llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp<br>

<br>

Modified: llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp?rev=337701&r1=337700&r2=337701&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp?rev=337701&r1=337700&r2=337701&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp (original)<br>

+++ llvm/trunk/lib/Target/ARM/ARMParallelDSP.cpp Mon Jul 23 08:25:59 2018<br>

@@ -43,38 +43,56 @@ using namespace PatternMatch;<br>

 STATISTIC(NumSMLAD , "Number of smlad instructions generated");<br>

<br>

 namespace {<br>

-  struct ParallelMAC;<br>

+  struct OpChain;<br>

+  struct BinOpChain;<br>

   struct Reduction;<br>

<br>

-  using ParallelMACList = SmallVector<ParallelMAC, 8>;<br>

+  using OpChainList     = SmallVector<OpChain*, 8>;<br>

   using ReductionList   = SmallVector<Reduction, 8>;<br>

   using ValueList       = SmallVector<Value*, 8>;<br>

   using MemInstList     = SmallVector<Instruction*, 8>;<br>

-  using PMACPair        = std::pair<ParallelMAC*,ParallelMAC*>;<br>

+  using PMACPair        = std::pair<BinOpChain*,BinOpChain*>;<br>

   using PMACPairList    = SmallVector<PMACPair, 8>;<br>

   using Instructions    = SmallVector<Instruction*,16>;<br>

   using MemLocList      = SmallVector<MemoryLocation, 4>;<br>

<br>

-  // 'ParallelMAC' and 'Reduction' are just some bookkeeping data structures.<br>

+  struct OpChain {<br>

+    Instruction   *Root;<br>

+    ValueList     AllValues;<br>

+    MemInstList   VecLd;    // List of all load instructions.<br>

+    MemLocList    MemLocs;  // All memory locations read by this tree.<br>

+    bool          ReadOnly = true;<br>

+<br>

+    OpChain(Instruction *I, ValueList &vl) : Root(I), AllValues(vl) { }<br>

+<br>

+    void SetMemoryLocations() {<br>

+      const auto Size = MemoryLocation::UnknownSize;<br>

+      for (auto *V : AllValues) {<br>

+        if (auto *I = dyn_cast<Instruction>(V)) {<br>

+          if (I->mayWriteToMemory())<br>

+            ReadOnly = false;<br>

+          if (auto *Ld = dyn_cast<LoadInst>(V))<br>

+            MemLocs.push_back(MemoryLocation(Ld->getPointerOperand(), Size));<br>

+        }<br>

+      }<br>

+    }<br>

+<br>

+    unsigned size() const { return AllValues.size(); }<br>

+  };<br>

+<br>

+  // 'BinOpChain' and 'Reduction' are just some bookkeeping data structures.<br>

   // 'Reduction' contains the phi-node and accumulator statement from where we<br>

-  // start pattern matching, and 'ParallelMAC' the multiplication<br>

+  // start pattern matching, and 'BinOpChain' the multiplication<br>

   // instructions that are candidates for parallel execution.<br>

-  struct ParallelMAC {<br>

-    Instruction *Mul;<br>

-    ValueList    VL;        // List of all (narrow) operands of this Mul<br>

-    MemInstList  VecLd;     // List of all load instructions of this Mul<br>

-    MemLocList   MemLocs;   // All memory locations read by this Mul<br>

-<br>

-    // The MAC-chains we currently recognise are simple chains that accumulate<br>

-    // their results with a reducing integer add statement, and consist of<br>

-    // a chain of adds and muls, which have only sext and load instructions as<br>

-    // operands. Thus, these chains don't write memory. We check that this is<br>

-    // true when we collect the operands, and use this in alias analysis checks<br>

-    // that different parallel MACs don't interfere with each other.<br>

-    bool ReadOnly;<br>

-<br>

-    ParallelMAC(Instruction *I, ValueList &V, bool RdOnly)<br>

-      : Mul(I), VL(V), ReadOnly(RdOnly) {};<br>

+  struct BinOpChain : public OpChain {<br>

+    ValueList     LHS;      // List of all (narrow) left hand operands.<br>

+    ValueList     RHS;      // List of all (narrow) right hand operands.<br>

+<br>

+    BinOpChain(Instruction *I, ValueList &lhs, ValueList &rhs) :<br>

+      OpChain(I, lhs), LHS(lhs), RHS(rhs) {<br>

+        for (auto *V : RHS)<br>

+          AllValues.push_back(V);<br>

+      }<br>

   };<br>

<br>

   struct Reduction {<br>

@@ -83,7 +101,7 @@ namespace {<br>

     Instruction     *AccIntAdd;       // The accumulating integer add statement,<br>

                                       // i.e, the reduction statement.<br>

<br>

-    ParallelMACList MACCandidates;    // The MAC candidates associated with<br>

+    OpChainList     MACCandidates;    // The MAC candidates associated with<br>

                                       // this reduction statement.<br>

     Reduction (PHINode *P, Instruction *Acc) : Phi(P), AccIntAdd(Acc) { };<br>

   };<br>

@@ -100,7 +118,7 @@ namespace {<br>

<br>

     bool InsertParallelMACs(Reduction &Reduction, PMACPairList &PMACPairs);<br>

     bool AreSequentialLoads(LoadInst *Ld0, LoadInst *Ld1, MemInstList &VecMem);<br>

-    PMACPairList CreateParallelMACPairs(ParallelMACList &Candidates);<br>

+    PMACPairList CreateParallelMACPairs(OpChainList &Candidates);<br>

     Instruction *CreateSMLADCall(LoadInst *VecLd0, LoadInst *VecLd1,<br>

                                  Instruction *Acc, Instruction *InsertAfter);<br>

<br>

@@ -303,7 +321,7 @@ bool ARMParallelDSP::AreSequentialLoads(<br>

 }<br>

<br>

 PMACPairList<br>

-ARMParallelDSP::CreateParallelMACPairs(ParallelMACList &Candidates) {<br>

+ARMParallelDSP::CreateParallelMACPairs(OpChainList &Candidates) {<br>

   const unsigned Elems = Candidates.size();<br>

   PMACPairList PMACPairs;<br>

<br>

@@ -314,10 +332,10 @@ ARMParallelDSP::CreateParallelMACPairs(P<br>

   // We can compare all elements, but then we need to compare and evaluate<br>

   // different solutions.<br>

   for(unsigned i=0; i<Elems-1; i+=2) {<br>

-    ParallelMAC &PMul0 = Candidates[i];<br>

-    ParallelMAC &PMul1 = Candidates[i+1];<br>

-    const Instruction *Mul0 = PMul0.Mul;<br>

-    const Instruction *Mul1 = PMul1.Mul;<br>

+    BinOpChain *PMul0 = static_cast<BinOpChain*>(Candidates[i]);<br>

+    BinOpChain *PMul1 = static_cast<BinOpChain*>(Candidates[i+1]);<br>

+    const Instruction *Mul0 = PMul0->Root;<br>

+    const Instruction *Mul1 = PMul1->Root;<br>

<br>

     if (Mul0 == Mul1)<br>

       continue;<br>

@@ -326,10 +344,13 @@ ARMParallelDSP::CreateParallelMACPairs(P<br>

                dbgs() << "- "; Mul0->dump();<br>

                dbgs() << "- "; Mul1->dump());<br>

<br>

-    const ValueList &VL0 = PMul0.VL;<br>

-    const ValueList &VL1 = PMul1.VL;<br>

+    const ValueList &Mul0_LHS = PMul0->LHS;<br>

+    const ValueList &Mul0_RHS = PMul0->RHS;<br>

+    const ValueList &Mul1_LHS = PMul1->LHS;<br>

+    const ValueList &Mul1_RHS = PMul1->RHS;<br>

<br>

-    if (!AreSymmetrical(VL0, VL1))<br>

+    if (!AreSymmetrical(Mul0_LHS, Mul1_LHS) ||<br>

+        !AreSymmetrical(Mul0_RHS, Mul1_RHS))<br>

       continue;<br>

<br>

     LLVM_DEBUG(dbgs() << "OK: mul operands list match:\n");<br>

@@ -337,23 +358,23 @@ ARMParallelDSP::CreateParallelMACPairs(P<br>

     // that its two pairs of consecutive loads, then these can be transformed<br>

     // into two wider loads and the users can be replaced with DSP<br>

     // intrinsics.<br>

-    for (unsigned x = 0; x < VL0.size(); x += 4) {<br>

-      auto *Ld0 = dyn_cast<LoadInst>(VL0[x]);<br>

-      auto *Ld1 = dyn_cast<LoadInst>(VL1[x]);<br>

-      auto *Ld2 = dyn_cast<LoadInst>(VL0[x+2]);<br>

-      auto *Ld3 = dyn_cast<LoadInst>(VL1[x+2]);<br>

+    for (unsigned x = 0; x < Mul0_LHS.size(); x += 2) {<br>

+      auto *Ld0 = dyn_cast<LoadInst>(Mul0_LHS[x]);<br>

+      auto *Ld1 = dyn_cast<LoadInst>(Mul1_LHS[x]);<br>

+      auto *Ld2 = dyn_cast<LoadInst>(Mul0_RHS[x]);<br>

+      auto *Ld3 = dyn_cast<LoadInst>(Mul1_RHS[x]);<br>

<br>

       LLVM_DEBUG(dbgs() << "Looking at operands " << x << ":\n";<br>

-                 dbgs() << "\t mul1: "; VL0[x]->dump();<br>

-                 dbgs() << "\t mul2: "; VL1[x]->dump();<br>

+                 dbgs() << "\t mul1: "; Mul0_LHS[x]->dump();<br>

+                 dbgs() << "\t mul2: "; Mul1_LHS[x]->dump();<br>

                  dbgs() << "and operands " << x + 2 << ":\n";<br>

-                 dbgs() << "\t mul1: "; VL0[x+2]->dump();<br>

-                 dbgs() << "\t mul2: "; VL1[x+2]->dump());<br>

+                 dbgs() << "\t mul1: "; Mul0_RHS[x]->dump();<br>

+                 dbgs() << "\t mul2: "; Mul1_RHS[x]->dump());<br>

<br>

-      if (AreSequentialLoads(Ld0, Ld1, Candidates[i].VecLd) &&<br>

-          AreSequentialLoads(Ld2, Ld3, Candidates[i+1].VecLd)) {<br>

+      if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd) &&<br>

+          AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {<br>

         LLVM_DEBUG(dbgs() << "OK: found two pairs of parallel loads!\n");<br>

-        PMACPairs.push_back(std::make_pair(&PMul0, &PMul1));<br>

+        PMACPairs.push_back(std::make_pair(PMul0, PMul1));<br>

       }<br>

     }<br>

   }<br>

@@ -367,8 +388,8 @@ bool ARMParallelDSP::InsertParallelMACs(<br>

<br>

   for (auto &Pair : PMACPairs) {<br>

     LLVM_DEBUG(dbgs() << "Found parallel MACs!!\n";<br>

-               dbgs() << "- "; Pair.first->Mul->dump();<br>

-               dbgs() << "- "; Pair.second->Mul->dump());<br>

+               dbgs() << "- "; Pair.first->Root->dump();<br>

+               dbgs() << "- "; Pair.second->Root->dump());<br>

     auto *VecLd0 = cast<LoadInst>(Pair.first->VecLd[0]);<br>

     auto *VecLd1 = cast<LoadInst>(Pair.second->VecLd[0]);<br>

     Acc = CreateSMLADCall(VecLd0, VecLd1, Acc, InsertAfter);<br>

@@ -383,9 +404,8 @@ bool ARMParallelDSP::InsertParallelMACs(<br>

   return false;<br>

 }<br>

<br>

-static ReductionList MatchReductions(Function &F, Loop *TheLoop,<br>

-                                     BasicBlock *Header) {<br>

-  ReductionList Reductions;<br>

+static void MatchReductions(Function &F, Loop *TheLoop, BasicBlock *Header,<br>

+                            ReductionList &Reductions) {<br>

   RecurrenceDescriptor RecDesc;<br>

   const bool HasFnNoNaNAttr =<br>

     F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";<br>

@@ -394,7 +414,7 @@ static ReductionList MatchReductions(Fun<br>

   // We need a preheader as getIncomingValueForBlock assumes there is one.<br>

   if (!TheLoop->getLoopPreheader()) {<br>

     LLVM_DEBUG(dbgs() << "No preheader found, bailing out\n");<br>

-    return Reductions;<br>

+    return;<br>

   }<br>

<br>

   for (PHINode &Phi : Header->phis()) {<br>

@@ -418,36 +438,29 @@ static ReductionList MatchReductions(Fun<br>

<br>

   LLVM_DEBUG(<br>

     dbgs() << "\nAccumulating integer additions (reductions) found:\n";<br>

-    for (auto R : Reductions) {<br>

+    for (auto &R : Reductions) {<br>

       dbgs() << "-  "; R.Phi->dump();<br>

       dbgs() << "-> "; R.AccIntAdd->dump();<br>

     }<br>

   );<br>

-  return Reductions;<br>

 }<br>

<br>

-static void AddMACCandidate(ParallelMACList &Candidates, const Instruction *Acc,<br>

+static void AddMACCandidate(OpChainList &Candidates,<br>

+                            const Instruction *Acc,<br>

                             Value *MulOp0, Value *MulOp1, int MulOpNum) {<br>

   Instruction *Mul = dyn_cast<Instruction>(Acc->getOperand(MulOpNum));<br>

   LLVM_DEBUG(dbgs() << "OK, found acc mul:\t"; Mul->dump());<br>

-  ValueList VL;<br>

-  if (IsNarrowSequence<16>(MulOp0, VL) &&<br>

-      IsNarrowSequence<16>(MulOp1, VL)) {<br>

+  ValueList LHS;<br>

+  ValueList RHS;<br>

+  if (IsNarrowSequence<16>(MulOp0, LHS) &&<br>

+      IsNarrowSequence<16>(MulOp1, RHS)) {<br>

     LLVM_DEBUG(dbgs() << "OK, found narrow mul: "; Mul->dump());<br>

-<br>

-    bool MayWriteMem = false;<br>

-    for (auto &V : VL) {<br>

-      if (dyn_cast<Instruction>(V)->mayWriteToMemory()) {<br>

-        MayWriteMem = true;<br>

-        break;<br>

-      }<br>

-    }<br>

-    Candidates.push_back(ParallelMAC(Mul, VL, !MayWriteMem));<br>

+    Candidates.push_back(new BinOpChain(Mul, LHS, RHS));<br>

   }<br>

 }<br>

<br>

-static ParallelMACList MatchParallelMACs(Reduction &R) {<br>

-  ParallelMACList Candidates;<br>

+static void MatchParallelMACSequences(Reduction &R,<br>

+                                      OpChainList &Candidates) {<br>

   const Instruction *Acc = R.AccIntAdd;<br>

   Value *A, *MulOp0, *MulOp1;<br>

   LLVM_DEBUG(dbgs() << "\n- Analysing:\t"; Acc->dump());<br>

@@ -473,7 +486,6 @@ static ParallelMACList MatchParallelMACs<br>

   // Because we start at the bottom of the chain, and we work our way up,<br>

   // the muls are added in reverse program order to the list.<br>

   std::reverse(Candidates.begin(), Candidates.end());<br>

-  return Candidates;<br>

 }<br>

<br>

 // Collects all instructions that are not part of the MAC chains, which is the<br>

@@ -492,23 +504,23 @@ static void AliasCandidates(BasicBlock *<br>

 // the memory locations accessed by the MAC-chains.<br>

 // TODO: we need the read statements when we accept more complicated chains.<br>

 static bool AreAliased(AliasAnalysis *AA, Instructions &Reads,<br>

-                       Instructions &Writes, ParallelMACList &MACCandidates) {<br>

+                       Instructions &Writes, OpChainList &MACCandidates) {<br>

   LLVM_DEBUG(dbgs() << "Alias checks:\n");<br>

-  for (auto &MAC : MACCandidates) {<br>

-    LLVM_DEBUG(dbgs() << "mul: "; MAC.Mul->dump());<br>

+  for (auto *MAC : MACCandidates) {<br>

+    LLVM_DEBUG(dbgs() << "mul: "; MAC->Root->dump());<br>

<br>

     // At the moment, we allow only simple chains that only consist of reads,<br>

     // accumulate their result with an integer add, and thus that don't write<br>

     // memory, and simply bail if they do.<br>

-    if (!MAC.ReadOnly)<br>

+    if (!MAC->ReadOnly)<br>

       return true;<br>

<br>

     // Now for all writes in the basic block, check that they don't alias with<br>

     // the memory locations accessed by our MAC-chain:<br>

     for (auto *I : Writes) {<br>

       LLVM_DEBUG(dbgs() << "- "; I->dump());<br>

-      assert(MAC.MemLocs.size() >= 2 && "expecting at least 2 memlocs");<br>

-      for (auto &MemLoc : MAC.MemLocs) {<br>

+      assert(MAC->MemLocs.size() >= 2 && "expecting at least 2 memlocs");<br>

+      for (auto &MemLoc : MAC->MemLocs) {<br>

         if (isModOrRefSet(intersectModRef(AA->getModRefInfo(I, MemLoc),<br>

                                           ModRefInfo::ModRef))) {<br>

           LLVM_DEBUG(dbgs() << "Yes, aliases found\n");<br>

@@ -522,24 +534,22 @@ static bool AreAliased(AliasAnalysis *AA<br>

   return false;<br>

 }<br>

<br>

-static bool SetMemoryLocations(ParallelMACList &Candidates) {<br>

-  const auto Size = MemoryLocation::UnknownSize;<br>

-  for (auto &C : Candidates) {<br>

+static bool CheckMACMemory(OpChainList &Candidates) {<br>

+  for (auto *C : Candidates) {<br>

     // A mul has 2 operands, and a narrow op consist of sext and a load; thus<br>

     // we expect at least 4 items in this operand value list.<br>

-    if (C.VL.size() < 4) {<br>

+    if (C->size() < 4) {<br>

       LLVM_DEBUG(dbgs() << "Operand list too short.\n");<br>

       return false;<br>

     }<br>

-<br>

-    for (unsigned i = 0; i < C.VL.size(); i += 4) {<br>

-      auto *LdOp0 = dyn_cast<LoadInst>(C.VL[i]);<br>

-      auto *LdOp1 = dyn_cast<LoadInst>(C.VL[i+2]);<br>

-      if (!LdOp0 || !LdOp1)<br>

+    C->SetMemoryLocations();<br>

+    ValueList &LHS = static_cast<BinOpChain*>(C)->LHS;<br>

+    ValueList &RHS = static_cast<BinOpChain*>(C)->RHS;<br>

+<br>

+    // Use +=2 to skip over the expected extend instructions.<br>

+    for (unsigned i = 0, e = LHS.size(); i < e; i += 2) {<br>

+      if (!isa<LoadInst>(LHS[i]) || !isa<LoadInst>(RHS[i]))<br>

         return false;<br>

-<br>

-      C.MemLocs.push_back(MemoryLocation(LdOp0->getPointerOperand(), Size));<br>

-      C.MemLocs.push_back(MemoryLocation(LdOp1->getPointerOperand(), Size));<br>

     }<br>

   }<br>

   return true;<br>

@@ -584,17 +594,20 @@ bool ARMParallelDSP::MatchSMLAD(Function<br>

              dbgs() << "Loop info:\n\n"; L->dump());<br>

<br>

   bool Changed = false;<br>

-  ReductionList Reductions = MatchReductions(F, L, Header);<br>

+  ReductionList Reductions;<br>

+  MatchReductions(F, L, Header, Reductions);<br>

<br>

   for (auto &R : Reductions) {<br>

-    ParallelMACList MACCandidates = MatchParallelMACs(R);<br>

-    if (!SetMemoryLocations(MACCandidates))<br>

+    OpChainList MACCandidates;<br>

+    MatchParallelMACSequences(R, MACCandidates);<br>

+    if (!CheckMACMemory(MACCandidates))<br>

       continue;<br>

+<br>

     R.MACCandidates = MACCandidates;<br>

<br>

     LLVM_DEBUG(dbgs() << "MAC candidates:\n";<br>

       for (auto &M : R.MACCandidates)<br>

-        M.Mul->dump();<br>

+        M->Root->dump();<br>

       dbgs() << "\n";);<br>

   }<br>

<br>

@@ -609,6 +622,8 @@ bool ARMParallelDSP::MatchSMLAD(Function<br>

       return false;<br>

     PMACPairList PMACPairs = CreateParallelMACPairs(R.MACCandidates);<br>

     Changed |= InsertParallelMACs(R, PMACPairs);<br>

+    for (auto *C : R.MACCandidates)<br>

+      delete C;<br>

   }<br>

<br>

   LLVM_DEBUG(if (Changed) dbgs() << "Header block:\n"; Header->dump(););<br>

<br>

<br>

_______________________________________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>

<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>

</blockquote></div>