[lld] r248038 - COFF: Parallelize ICF.

Fri Sep 18 15:18:52 PDT 2015

On Fri, Sep 18, 2015 at 2:35 PM, David Blaikie <dblaikie at gmail.com> wrote:

>
>
> On Fri, Sep 18, 2015 at 2:06 PM, Rui Ueyama via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: ruiu
>> Date: Fri Sep 18 16:06:34 2015
>> New Revision: 248038
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=248038&view=rev
>> Log:
>> COFF: Parallelize ICF.
>>
>> The LLD's ICF algorithm is highly parallelizable. This patch does that
>> using parallel_for_each.
>>
>> ICF accounted for about one third of total execution time. Previously,
>> it took 324 ms when self-hosting. Now it takes only 62 ms.
>>
>> Of course your mileage may vary. My machine is a beefy 24-core Xeon
>> machine,
>> so you may not see this much speedup. But this optimization should be
>> effective even for 2-core machine, since I saw speedup (324 ms -> 189 ms)
>> when setting parallelism parameter to 2.
>>
>> Modified:
>>     lld/trunk/COFF/Chunks.h
>>     lld/trunk/COFF/ICF.cpp
>>
>> Modified: lld/trunk/COFF/Chunks.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/Chunks.h?rev=248038&r1=248037&r2=248038&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/COFF/Chunks.h (original)
>> +++ lld/trunk/COFF/Chunks.h Fri Sep 18 16:06:34 2015
>> @@ -17,6 +17,7 @@
>>  #include "llvm/ADT/iterator.h"
>>  #include "llvm/ADT/iterator_range.h"
>>  #include "llvm/Object/COFF.h"
>> +#include <atomic>
>>  #include <map>
>>  #include <vector>
>>
>> @@ -203,7 +204,7 @@ private:
>>
>>    // Used for ICF (Identical COMDAT Folding)
>>    void replaceWith(SectionChunk *Other);
>> -  uint64_t GroupID;
>> +  std::atomic<uint64_t> GroupID = { 0 };
>>
>>    // Chunks are basically unnamed chunks of bytes.
>>    // Symbols are associated for debugging and logging purposs only.
>>
>> Modified: lld/trunk/COFF/ICF.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/lld/trunk/COFF/ICF.cpp?rev=248038&r1=248037&r2=248038&view=diff
>>
>> ==============================================================================
>> --- lld/trunk/COFF/ICF.cpp (original)
>> +++ lld/trunk/COFF/ICF.cpp Fri Sep 18 16:06:34 2015
>> @@ -45,6 +45,7 @@
>>
>>  #include "Chunks.h"
>>  #include "Symbols.h"
>> +#include "lld/Core/Parallel.h"
>>  #include "llvm/ADT/Hashing.h"
>>  #include "llvm/Support/Debug.h"
>>  #include "llvm/Support/raw_ostream.h"
>> @@ -56,6 +57,7 @@ using namespace llvm;
>>  namespace lld {
>>  namespace coff {
>>
>> +static const size_t NJOBS = 256;
>>  typedef std::vector<SectionChunk *>::iterator ChunkIterator;
>>  typedef bool (*Comparator)(const SectionChunk *, const SectionChunk *);
>>
>> @@ -72,7 +74,7 @@ private:
>>    bool partition(ChunkIterator Begin, ChunkIterator End, Comparator Eq);
>>
>>    const std::vector<Chunk *> &Chunks;
>> -  uint64_t NextID = 0;
>> +  uint64_t NextID = 1;
>>  };
>>
>>  // Entry point to ICF.
>> @@ -179,51 +181,74 @@ bool ICF::forEachGroup(std::vector<Secti
>>  // contents and relocations are all the same.
>>  void ICF::run() {
>>    // Collect only mergeable sections and group by hash value.
>> -  std::vector<SectionChunk *> SChunks;
>> -  for (Chunk *C : Chunks) {
>> +  std::vector<std::vector<SectionChunk *>> VChunks(NJOBS);
>>
>
> Could you move this variable down to the desired scope - after the
> parallel_for_each, just before the for-Chunks loop - then it'll be easier
> to tell there's no racy parallel use of it.
>
>
>> +  parallel_for_each(Chunks.begin(), Chunks.end(), [&](Chunk *C) {
>>      if (auto *SC = dyn_cast<SectionChunk>(C)) {
>>        bool Writable = SC->getPermissions() &
>> llvm::COFF::IMAGE_SCN_MEM_WRITE;
>> -      if (SC->isCOMDAT() && SC->isLive() && !Writable) {
>> -        SChunks.push_back(SC);
>> +      if (SC->isCOMDAT() && SC->isLive() && !Writable)
>>          SC->GroupID = getHash(SC) | (uint64_t(1) << 63);
>> +    }
>> +  });
>> +  for (Chunk *C : Chunks) {
>> +    if (auto *SC = dyn_cast<SectionChunk>(C)) {
>> +      if (SC->GroupID) {
>> +        VChunks[SC->GroupID % NJOBS].push_back(SC);
>>        } else {
>>          SC->GroupID = NextID++;
>>        }
>>      }
>>    }
>>
>> -  // From now on, sections in SChunks are ordered so that sections in
>> +  // From now on, sections in Chunks are ordered so that sections in
>>    // the same group are consecutive in the vector.
>> -  std::sort(SChunks.begin(), SChunks.end(),
>> -            [](SectionChunk *A, SectionChunk *B) {
>> -              return A->GroupID < B->GroupID;
>> -            });
>> +  parallel_for_each(VChunks.begin(), VChunks.end(),
>> +                    [&](std::vector<SectionChunk *> &SChunks) {
>> +    std::sort(SChunks.begin(), SChunks.end(),
>> +              [](SectionChunk *A, SectionChunk *B) {
>> +                return A->GroupID < B->GroupID;
>>
>
> This lambda is in two places - perhaps it should be written in one place
> (maybe SectionChunk could have op<, then we could use a "deref" variadic
> functor that just does the dereferencing? I could use more words to
> describe that if it's interesting/non-obvious).
>

Actually looks like this lambda appears only once.

>
>
>> +              });
>> +  });
>>
>>    // Split groups until we get a convergence.
>>    int Cnt = 1;
>> -  forEachGroup(SChunks, equalsConstant);
>> -  while (forEachGroup(SChunks, equalsVariable))
>> +  parallel_for_each(VChunks.begin(), VChunks.end(),
>> +                    [&](std::vector<SectionChunk *> &SChunks) {
>> +    forEachGroup(SChunks, equalsConstant);
>> +  });
>> +
>> +  for (;;) {
>> +    bool Redo = false;
>> +    parallel_for_each(VChunks.begin(), VChunks.end(),
>> +                      [&](std::vector<SectionChunk *> &SChunks) {
>> +      if (forEachGroup(SChunks, equalsVariable))
>> +        Redo = true;
>>
>
> This looks like a race condition - multiple threads may be racing to write
> 'true' to Redo.
>
> (a parallel_find would be a nice abstraction to have, probably)
>
>
>> +    });
>> +    if (!Redo)
>> +      break;
>>      ++Cnt;
>> +  }
>>    if (Config->Verbose)
>>      llvm::outs() << "\nICF needed " << Cnt << " iterations.\n";
>>
>>    // Merge sections in the same group.
>> -  for (auto It = SChunks.begin(), End = SChunks.end(); It != End;) {
>> -    SectionChunk *Head = *It;
>> -    auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
>> -      return Head->GroupID != SC->GroupID;
>> -    });
>> -    if (std::distance(It, Bound) == 1) {
>
> -      It = Bound;
>> -      continue;
>> -    }
>> -    if (Config->Verbose)
>> -      llvm::outs() << "Selected " << Head->getDebugName() << "\n";
>> -    for (++It; It != Bound; ++It) {
>> -      SectionChunk *SC = *It;
>> +  for (std::vector<SectionChunk *> &SChunks : VChunks) {
>> +    for (auto It = SChunks.begin(), End = SChunks.end(); It != End;) {
>> +      SectionChunk *Head = *It;
>> +      auto Bound = std::find_if(It + 1, End, [&](SectionChunk *SC) {
>> +        return Head->GroupID != SC->GroupID;
>> +      });
>> +      if (std::distance(It, Bound) == 1) {
>>
>
> This could be written as "It + 1 == Bound" (or "std::next(It) == Bound")
> which may or may not be more legible, up to you. Possibly pull out the It +
> 1 in the find above, and reuse the value of that expression here.
>
> (std::distance is particularly useful for generic code, or when you're
> dealing with a non-random-access-iterator (one that only has
> increment/decrement, not arbitrary subtraction - it's perhaps more
> confusing than helpful in the common case like this))
>
>
>> +        It = Bound;
>> +        continue;
>> +      }
>>        if (Config->Verbose)
>> -        llvm::outs() << "  Removed " << SC->getDebugName() << "\n";
>> -      SC->replaceWith(Head);
>> +        llvm::outs() << "Selected " << Head->getDebugName() << "\n";
>> +      for (++It; It != Bound; ++It) {
>> +        SectionChunk *SC = *It;
>> +        if (Config->Verbose)
>> +          llvm::outs() << "  Removed " << SC->getDebugName() << "\n";
>> +        SC->replaceWith(Head);
>> +      }
>>      }
>>    }
>>  }
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150918/b4e9cf89/attachment.html>