[llvm] r256667 - [PGO]: Implement Func PGO name string compression

Vedant Kumar via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 4 10:28:46 PST 2016


Hi David,

I have a few comments on this (inline).

> +int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
> +                              bool doCompression, std::string &Result) {
> +  uint8_t Header[16], *P = Header;
> +  std::string UncompressedNameStrings;
> +
> +  for (auto NameStr : NameStrs) {
> +    UncompressedNameStrings += NameStr;
> +    UncompressedNameStrings.append(" ");
> +  }

The complexity of this loop is quadratic in the average name length.

Wdyt of (1) summing the name lengths, (2) allocating a char buffer,
and (3) copying in the NameStrs manually?


> +  unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
> +  P += EncLen;
> +  if (!doCompression) {
> +    EncLen = encodeULEB128(0, P);
> +    P += EncLen;
> +    Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);

This last line is tricky to read. See my comment below for an alternative.


> +    Result += UncompressedNameStrings;
> +    return 0;
> +  }
> +  SmallVector<char, 128> CompressedNameStrings;
> +  zlib::Status Success =
> +      zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
> +                     zlib::BestSizeCompression);
> +  assert(Success == zlib::StatusOK);

This assert is fine for debugging but I don't think it belongs in-tree.


> +  if (Success != zlib::StatusOK)
> +    return 1;
> +  EncLen = encodeULEB128(CompressedNameStrings.size(), P);
> +  P += EncLen;
> +  Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);

Since this line is needed twice, why not lift it into a lambda like:

auto AppendLengthsToResult = [&] {
  // Store the uncompressed and compressed name string lengths in \p Result.
  char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
  unsigned HeaderLen = P - &Header[0];
  Result.append(HeaderStr, HeaderLen);
};


> +  Result +=
> +      std::string(CompressedNameStrings.data(), CompressedNameStrings.size());
> +  return 0;
> +}
> +
> +int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
> +                              std::string &Result) {
> +  std::vector<std::string> NameStrs;
> +  for (auto *NameVar : NameVars) {
> +    auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
> +    StringRef NameStr =
> +        Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
> +    NameStrs.push_back(NameStr.str());
> +  }
> +  return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
> +}
> +
> +int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
> +  const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
> +  const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
> +                                                          NameStrings.size());
> +  while (P < EndP) {
> +    uint32_t N;
> +    uint64_t UncompressedSize = decodeULEB128(P, &N);
> +    P += N;
> +    uint64_t CompressedSize = decodeULEB128(P, &N);
> +    P += N;
> +    bool isCompressed = (CompressedSize != 0);
> +    SmallString<128> UncompressedNameStrings;
> +    StringRef NameStrings;
> +    if (isCompressed) {
> +      StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
> +                                      CompressedSize);
> +      if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
> +                           UncompressedSize) != zlib::StatusOK)
> +        return 1;
> +      P += CompressedSize;
> +      NameStrings = StringRef(UncompressedNameStrings.data(),
> +                              UncompressedNameStrings.size());
> +    } else {
> +      NameStrings =
> +          StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
> +      P += UncompressedSize;
> +    }

--

> +    // Now parse the name strings.
> +    size_t NameStart = 0;
> +    bool isLast = false;
> +    do {
> +      size_t NameStop = NameStrings.find(' ', NameStart);
> +      if (NameStop == StringRef::npos)
> +        return 1;
> +      if (NameStop == NameStrings.size() - 1)
> +        isLast = true;
> +      StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
> +      Symtab.addFuncName(Name);
> +      if (isLast)
> +        break;
> +      NameStart = NameStop + 1;
> +    } while (true);

--

It would be simpler to do this:

llvm::SmallVector<StringRef, 0> Names;
NameStrings.split(Names, ' ');
for (StringRef &Name : Names) Symtab.addFuncName(Name);


> +
> +    while (P < EndP && *P == 0)
> +      P++;
> +  }
> +  Symtab.finalizeSymtab();
> +  return 0;
> +}
> +
> instrprof_error
> InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
>                                          uint64_t Weight) {
> 
> Modified: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ProfileData/InstrProfTest.cpp?rev=256667&r1=256666&r2=256667&view=diff
> ==============================================================================
> --- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp (original)
> +++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp Thu Dec 31 01:57:16 2015
> @@ -9,6 +9,7 @@
> 
> #include "llvm/ProfileData/InstrProfReader.h"
> #include "llvm/ProfileData/InstrProfWriter.h"
> +#include "llvm/Support/Compression.h"
> #include "gtest/gtest.h"
> 
> #include <cstdarg>
> @@ -583,4 +584,64 @@ TEST_F(InstrProfTest, instr_prof_symtab_
>   ASSERT_EQ(StringRef("bar3"), R);
> }
> 
> +TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
> +  std::vector<std::string> FuncNames1;
> +  std::vector<std::string> FuncNames2;
> +  for (int I = 0; I < 10 * 1024; I++) {
> +    std::string str;
> +    raw_string_ostream OS(str);
> +    OS << "func_" << I;
> +    FuncNames1.push_back(OS.str());
> +    str.clear();
> +    OS << "fooooooooooooooo_" << I;
> +    FuncNames1.push_back(OS.str());
> +    str.clear();
> +    OS << "BAR_" << I;
> +    FuncNames2.push_back(OS.str());
> +    str.clear();
> +    OS << "BlahblahBlahblahBar_" << I;
> +    FuncNames2.push_back(OS.str());
> +  }
> +
> +  for (int Padding = 0; Padding < 10; Padding++) {
> +    for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
> +      // Compressing:
> +      std::string FuncNameStrings1;
> +      collectPGOFuncNameStrings(FuncNames1,
> +                                (DoCompression != 0 && zlib::isAvailable()),
> +                                FuncNameStrings1);
> +
> +      // Compressing:
> +      std::string FuncNameStrings2;
> +      collectPGOFuncNameStrings(FuncNames2,
> +                                (DoCompression != 0 && zlib::isAvailable()),
> +                                FuncNameStrings2);
> +
> +      // Join with paddings:
> +      std::string FuncNameStrings = FuncNameStrings1;
> +      for (int P = 0; P < Padding; P++) {
> +        FuncNameStrings.push_back('\0');
> +      }
> +      FuncNameStrings += FuncNameStrings2;
> +
> +      // Now decompress
> +      InstrProfSymtab Symtab;
> +      Symtab.create(StringRef(FuncNameStrings));
> +
> +      // Now check
> +      for (int I = 0; I < 10 * 1024; I++) {
> +        std::string N[4];
> +        N[0] = FuncNames1[2 * I];
> +        N[1] = FuncNames1[2 * I + 1];
> +        N[2] = FuncNames2[2 * I];
> +        N[3] = FuncNames2[2 * I + 1];
> +        for (int J = 0; J < 4; J++) {
> +          StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
> +          ASSERT_EQ(StringRef(N[J]), R);
> +        }
> +      }
> +    }
> +  }
> +}
> +
> } // end anonymous namespace
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


thanks
vedant



More information about the llvm-commits mailing list