[llvm] r256667 - [PGO]: Implement Func PGO name string compression

Xinliang David Li via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 4 14:14:24 PST 2016


great comments. I have addresses all of them.

thanks,

David


On Mon, Jan 4, 2016 at 10:28 AM, Vedant Kumar <vsk at apple.com> wrote:
> Hi David,
>
> I have a few comments on this (inline).
>
>> +int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
>> +                              bool doCompression, std::string &Result) {
>> +  uint8_t Header[16], *P = Header;
>> +  std::string UncompressedNameStrings;
>> +
>> +  for (auto NameStr : NameStrs) {
>> +    UncompressedNameStrings += NameStr;
>> +    UncompressedNameStrings.append(" ");
>> +  }
>
> The complexity of this loop is quadratic in the average name length.
>
> Wdyt of (1) summing the name lengths, (2) allocating a char buffer,
> and (3) copying in the NameStrs manually?
>
>
>> +  unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
>> +  P += EncLen;
>> +  if (!doCompression) {
>> +    EncLen = encodeULEB128(0, P);
>> +    P += EncLen;
>> +    Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
>
> This last line is tricky to read. See my comment below for an alternative.
>
>
>> +    Result += UncompressedNameStrings;
>> +    return 0;
>> +  }
>> +  SmallVector<char, 128> CompressedNameStrings;
>> +  zlib::Status Success =
>> +      zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
>> +                     zlib::BestSizeCompression);
>> +  assert(Success == zlib::StatusOK);
>
> This assert is fine for debugging but I don't think it belongs in-tree.
>
>
>> +  if (Success != zlib::StatusOK)
>> +    return 1;
>> +  EncLen = encodeULEB128(CompressedNameStrings.size(), P);
>> +  P += EncLen;
>> +  Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
>
> Since this line is needed twice, why not lift it into a lambda like:
>
> auto AppendLengthsToResult = [&] {
>   // Store the uncompressed and compressed name string lengths in \p Result.
>   char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
>   unsigned HeaderLen = P - &Header[0];
>   Result.append(HeaderStr, HeaderLen);
> };
>
>
>> +  Result +=
>> +      std::string(CompressedNameStrings.data(), CompressedNameStrings.size());
>> +  return 0;
>> +}
>> +
>> +int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
>> +                              std::string &Result) {
>> +  std::vector<std::string> NameStrs;
>> +  for (auto *NameVar : NameVars) {
>> +    auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
>> +    StringRef NameStr =
>> +        Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
>> +    NameStrs.push_back(NameStr.str());
>> +  }
>> +  return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
>> +}
>> +
>> +int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
>> +  const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
>> +  const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
>> +                                                          NameStrings.size());
>> +  while (P < EndP) {
>> +    uint32_t N;
>> +    uint64_t UncompressedSize = decodeULEB128(P, &N);
>> +    P += N;
>> +    uint64_t CompressedSize = decodeULEB128(P, &N);
>> +    P += N;
>> +    bool isCompressed = (CompressedSize != 0);
>> +    SmallString<128> UncompressedNameStrings;
>> +    StringRef NameStrings;
>> +    if (isCompressed) {
>> +      StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
>> +                                      CompressedSize);
>> +      if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
>> +                           UncompressedSize) != zlib::StatusOK)
>> +        return 1;
>> +      P += CompressedSize;
>> +      NameStrings = StringRef(UncompressedNameStrings.data(),
>> +                              UncompressedNameStrings.size());
>> +    } else {
>> +      NameStrings =
>> +          StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
>> +      P += UncompressedSize;
>> +    }
>
> --
>
>> +    // Now parse the name strings.
>> +    size_t NameStart = 0;
>> +    bool isLast = false;
>> +    do {
>> +      size_t NameStop = NameStrings.find(' ', NameStart);
>> +      if (NameStop == StringRef::npos)
>> +        return 1;
>> +      if (NameStop == NameStrings.size() - 1)
>> +        isLast = true;
>> +      StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
>> +      Symtab.addFuncName(Name);
>> +      if (isLast)
>> +        break;
>> +      NameStart = NameStop + 1;
>> +    } while (true);
>
> --
>
> It would be simpler to do this:
>
> llvm::SmallVector<StringRef, 0> Names;
> NameStrings.split(Names, ' ');
> for (StringRef &Name : Names) Symtab.addFuncName(Name);
>
>
>> +
>> +    while (P < EndP && *P == 0)
>> +      P++;
>> +  }
>> +  Symtab.finalizeSymtab();
>> +  return 0;
>> +}
>> +
>> instrprof_error
>> InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
>>                                          uint64_t Weight) {
>>
>> Modified: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ProfileData/InstrProfTest.cpp?rev=256667&r1=256666&r2=256667&view=diff
>> ==============================================================================
>> --- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp (original)
>> +++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp Thu Dec 31 01:57:16 2015
>> @@ -9,6 +9,7 @@
>>
>> #include "llvm/ProfileData/InstrProfReader.h"
>> #include "llvm/ProfileData/InstrProfWriter.h"
>> +#include "llvm/Support/Compression.h"
>> #include "gtest/gtest.h"
>>
>> #include <cstdarg>
>> @@ -583,4 +584,64 @@ TEST_F(InstrProfTest, instr_prof_symtab_
>>   ASSERT_EQ(StringRef("bar3"), R);
>> }
>>
>> +TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
>> +  std::vector<std::string> FuncNames1;
>> +  std::vector<std::string> FuncNames2;
>> +  for (int I = 0; I < 10 * 1024; I++) {
>> +    std::string str;
>> +    raw_string_ostream OS(str);
>> +    OS << "func_" << I;
>> +    FuncNames1.push_back(OS.str());
>> +    str.clear();
>> +    OS << "fooooooooooooooo_" << I;
>> +    FuncNames1.push_back(OS.str());
>> +    str.clear();
>> +    OS << "BAR_" << I;
>> +    FuncNames2.push_back(OS.str());
>> +    str.clear();
>> +    OS << "BlahblahBlahblahBar_" << I;
>> +    FuncNames2.push_back(OS.str());
>> +  }
>> +
>> +  for (int Padding = 0; Padding < 10; Padding++) {
>> +    for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
>> +      // Compressing:
>> +      std::string FuncNameStrings1;
>> +      collectPGOFuncNameStrings(FuncNames1,
>> +                                (DoCompression != 0 && zlib::isAvailable()),
>> +                                FuncNameStrings1);
>> +
>> +      // Compressing:
>> +      std::string FuncNameStrings2;
>> +      collectPGOFuncNameStrings(FuncNames2,
>> +                                (DoCompression != 0 && zlib::isAvailable()),
>> +                                FuncNameStrings2);
>> +
>> +      // Join with paddings:
>> +      std::string FuncNameStrings = FuncNameStrings1;
>> +      for (int P = 0; P < Padding; P++) {
>> +        FuncNameStrings.push_back('\0');
>> +      }
>> +      FuncNameStrings += FuncNameStrings2;
>> +
>> +      // Now decompress
>> +      InstrProfSymtab Symtab;
>> +      Symtab.create(StringRef(FuncNameStrings));
>> +
>> +      // Now check
>> +      for (int I = 0; I < 10 * 1024; I++) {
>> +        std::string N[4];
>> +        N[0] = FuncNames1[2 * I];
>> +        N[1] = FuncNames1[2 * I + 1];
>> +        N[2] = FuncNames2[2 * I];
>> +        N[3] = FuncNames2[2 * I + 1];
>> +        for (int J = 0; J < 4; J++) {
>> +          StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
>> +          ASSERT_EQ(StringRef(N[J]), R);
>> +        }
>> +      }
>> +    }
>> +  }
>> +}
>> +
>> } // end anonymous namespace
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
> thanks
> vedant
>


More information about the llvm-commits mailing list