[llvm] r256667 - [PGO]: Implement Func PGO name string compression

Mon Jan 4 14:15:32 PST 2016

Good point about the error handling. I will add that later in the
client code where more context is available.

thanks,

David

On Mon, Jan 4, 2016 at 10:36 AM, Pete Cooper <peter_cooper at apple.com> wrote:
>
> On Jan 4, 2016, at 10:28 AM, Vedant Kumar via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
>
> Hi David,
>
> I have a few comments on this (inline).
>
> +int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
> +                              bool doCompression, std::string &Result) {
> +  uint8_t Header[16], *P = Header;
> +  std::string UncompressedNameStrings;
> +
> +  for (auto NameStr : NameStrs) {
> +    UncompressedNameStrings += NameStr;
> +    UncompressedNameStrings.append(" ");
> +  }
>
>
> The complexity of this loop is quadratic in the average name length.
>
> Wdyt of (1) summing the name lengths, (2) allocating a char buffer,
> and (3) copying in the NameStrs manually?
>
>
> +  unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
> +  P += EncLen;
> +  if (!doCompression) {
> +    EncLen = encodeULEB128(0, P);
> +    P += EncLen;
> +    Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
>
>
> This last line is tricky to read. See my comment below for an alternative.
>
>
> +    Result += UncompressedNameStrings;
> +    return 0;
> +  }
> +  SmallVector<char, 128> CompressedNameStrings;
> +  zlib::Status Success =
> +      zlib::compress(StringRef(UncompressedNameStrings),
> CompressedNameStrings,
> +                     zlib::BestSizeCompression);
> +  assert(Success == zlib::StatusOK);
>
>
> This assert is fine for debugging but I don't think it belongs in-tree.
>
> Actually, we might need something more powerful than an assert here.
>
> Its possible for a zlib enabled compiler to generate a compressed PGO file,
> which a non-zlib enabled LLVM then tries (and fails) to read.
>
> In the uncompress case, we should probably give a nice error to the user if
> they try that combination of tools, as otherwise they
> will just get a generic error that something went wrong.
>
> Cheers,
> Pete
>
>
>
> +  if (Success != zlib::StatusOK)
> +    return 1;
> +  EncLen = encodeULEB128(CompressedNameStrings.size(), P);
> +  P += EncLen;
> +  Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
>
>
> Since this line is needed twice, why not lift it into a lambda like:
>
> auto AppendLengthsToResult = [&] {
>  // Store the uncompressed and compressed name string lengths in \p Result.
>  char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
>  unsigned HeaderLen = P - &Header[0];
>  Result.append(HeaderStr, HeaderLen);
> };
>
>
> +  Result +=
> +      std::string(CompressedNameStrings.data(),
> CompressedNameStrings.size());
> +  return 0;
> +}
> +
> +int collectPGOFuncNameStrings(const std::vector<GlobalVariable *>
> &NameVars,
> +                              std::string &Result) {
> +  std::vector<std::string> NameStrs;
> +  for (auto *NameVar : NameVars) {
> +    auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
> +    StringRef NameStr =
> +        Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
> +    NameStrs.push_back(NameStr.str());
> +  }
> +  return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
> +}
> +
> +int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab)
> {
> +  const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
> +  const uint8_t *EndP = reinterpret_cast<const uint8_t
> *>(NameStrings.data() +
> +
> NameStrings.size());
> +  while (P < EndP) {
> +    uint32_t N;
> +    uint64_t UncompressedSize = decodeULEB128(P, &N);
> +    P += N;
> +    uint64_t CompressedSize = decodeULEB128(P, &N);
> +    P += N;
> +    bool isCompressed = (CompressedSize != 0);
> +    SmallString<128> UncompressedNameStrings;
> +    StringRef NameStrings;
> +    if (isCompressed) {
> +      StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
> +                                      CompressedSize);
> +      if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
> +                           UncompressedSize) != zlib::StatusOK)
> +        return 1;
> +      P += CompressedSize;
> +      NameStrings = StringRef(UncompressedNameStrings.data(),
> +                              UncompressedNameStrings.size());
> +    } else {
> +      NameStrings =
> +          StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
> +      P += UncompressedSize;
> +    }
>
>
> --
>
> +    // Now parse the name strings.
> +    size_t NameStart = 0;
> +    bool isLast = false;
> +    do {
> +      size_t NameStop = NameStrings.find(' ', NameStart);
> +      if (NameStop == StringRef::npos)
> +        return 1;
> +      if (NameStop == NameStrings.size() - 1)
> +        isLast = true;
> +      StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
> +      Symtab.addFuncName(Name);
> +      if (isLast)
> +        break;
> +      NameStart = NameStop + 1;
> +    } while (true);
>
>
> --
>
> It would be simpler to do this:
>
> llvm::SmallVector<StringRef, 0> Names;
> NameStrings.split(Names, ' ');
> for (StringRef &Name : Names) Symtab.addFuncName(Name);
>
>
> +
> +    while (P < EndP && *P == 0)
> +      P++;
> +  }
> +  Symtab.finalizeSymtab();
> +  return 0;
> +}
> +
> instrprof_error
> InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
>                                         uint64_t Weight) {
>
> Modified: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ProfileData/InstrProfTest.cpp?rev=256667&r1=256666&r2=256667&view=diff
> ==============================================================================
> --- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp (original)
> +++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp Thu Dec 31 01:57:16
> 2015
> @@ -9,6 +9,7 @@
>
> #include "llvm/ProfileData/InstrProfReader.h"
> #include "llvm/ProfileData/InstrProfWriter.h"
> +#include "llvm/Support/Compression.h"
> #include "gtest/gtest.h"
>
> #include <cstdarg>
> @@ -583,4 +584,64 @@ TEST_F(InstrProfTest, instr_prof_symtab_
>  ASSERT_EQ(StringRef("bar3"), R);
> }
>
> +TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
> +  std::vector<std::string> FuncNames1;
> +  std::vector<std::string> FuncNames2;
> +  for (int I = 0; I < 10 * 1024; I++) {
> +    std::string str;
> +    raw_string_ostream OS(str);
> +    OS << "func_" << I;
> +    FuncNames1.push_back(OS.str());
> +    str.clear();
> +    OS << "fooooooooooooooo_" << I;
> +    FuncNames1.push_back(OS.str());
> +    str.clear();
> +    OS << "BAR_" << I;
> +    FuncNames2.push_back(OS.str());
> +    str.clear();
> +    OS << "BlahblahBlahblahBar_" << I;
> +    FuncNames2.push_back(OS.str());
> +  }
> +
> +  for (int Padding = 0; Padding < 10; Padding++) {
> +    for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
> +      // Compressing:
> +      std::string FuncNameStrings1;
> +      collectPGOFuncNameStrings(FuncNames1,
> +                                (DoCompression != 0 &&
> zlib::isAvailable()),
> +                                FuncNameStrings1);
> +
> +      // Compressing:
> +      std::string FuncNameStrings2;
> +      collectPGOFuncNameStrings(FuncNames2,
> +                                (DoCompression != 0 &&
> zlib::isAvailable()),
> +                                FuncNameStrings2);
> +
> +      // Join with paddings:
> +      std::string FuncNameStrings = FuncNameStrings1;
> +      for (int P = 0; P < Padding; P++) {
> +        FuncNameStrings.push_back('\0');
> +      }
> +      FuncNameStrings += FuncNameStrings2;
> +
> +      // Now decompress
> +      InstrProfSymtab Symtab;
> +      Symtab.create(StringRef(FuncNameStrings));
> +
> +      // Now check
> +      for (int I = 0; I < 10 * 1024; I++) {
> +        std::string N[4];
> +        N[0] = FuncNames1[2 * I];
> +        N[1] = FuncNames1[2 * I + 1];
> +        N[2] = FuncNames2[2 * I];
> +        N[3] = FuncNames2[2 * I + 1];
> +        for (int J = 0; J < 4; J++) {
> +          StringRef R =
> Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
> +          ASSERT_EQ(StringRef(N[J]), R);
> +        }
> +      }
> +    }
> +  }
> +}
> +
> } // end anonymous namespace
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
>
> thanks
> vedant
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>