[llvm] r256667 - [PGO]: Implement Func PGO name string compression
Xinliang David Li via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 4 14:15:32 PST 2016
Good point about the error handling. I will add that later in the
client code where more context is available.
thanks,
David
On Mon, Jan 4, 2016 at 10:36 AM, Pete Cooper <peter_cooper at apple.com> wrote:
>
> On Jan 4, 2016, at 10:28 AM, Vedant Kumar via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
>
> Hi David,
>
> I have a few comments on this (inline).
>
> +int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
> + bool doCompression, std::string &Result) {
> + uint8_t Header[16], *P = Header;
> + std::string UncompressedNameStrings;
> +
> + for (auto NameStr : NameStrs) {
> + UncompressedNameStrings += NameStr;
> + UncompressedNameStrings.append(" ");
> + }
>
>
> The complexity of this loop is quadratic in the average name length.
>
> Wdyt of (1) summing the name lengths, (2) allocating a char buffer,
> and (3) copying in the NameStrs manually?
>
>
> + unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
> + P += EncLen;
> + if (!doCompression) {
> + EncLen = encodeULEB128(0, P);
> + P += EncLen;
> + Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
>
>
> This last line is tricky to read. See my comment below for an alternative.
>
>
> + Result += UncompressedNameStrings;
> + return 0;
> + }
> + SmallVector<char, 128> CompressedNameStrings;
> + zlib::Status Success =
> + zlib::compress(StringRef(UncompressedNameStrings),
> CompressedNameStrings,
> + zlib::BestSizeCompression);
> + assert(Success == zlib::StatusOK);
>
>
> This assert is fine for debugging but I don't think it belongs in-tree.
>
> Actually, we might need something more powerful than an assert here.
>
> Its possible for a zlib enabled compiler to generate a compressed PGO file,
> which a non-zlib enabled LLVM then tries (and fails) to read.
>
> In the uncompress case, we should probably give a nice error to the user if
> they try that combination of tools, as otherwise they
> will just get a generic error that something went wrong.
>
> Cheers,
> Pete
>
>
>
> + if (Success != zlib::StatusOK)
> + return 1;
> + EncLen = encodeULEB128(CompressedNameStrings.size(), P);
> + P += EncLen;
> + Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
>
>
> Since this line is needed twice, why not lift it into a lambda like:
>
> auto AppendLengthsToResult = [&] {
> // Store the uncompressed and compressed name string lengths in \p Result.
> char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
> unsigned HeaderLen = P - &Header[0];
> Result.append(HeaderStr, HeaderLen);
> };
>
>
> + Result +=
> + std::string(CompressedNameStrings.data(),
> CompressedNameStrings.size());
> + return 0;
> +}
> +
> +int collectPGOFuncNameStrings(const std::vector<GlobalVariable *>
> &NameVars,
> + std::string &Result) {
> + std::vector<std::string> NameStrs;
> + for (auto *NameVar : NameVars) {
> + auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
> + StringRef NameStr =
> + Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
> + NameStrs.push_back(NameStr.str());
> + }
> + return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
> +}
> +
> +int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab)
> {
> + const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
> + const uint8_t *EndP = reinterpret_cast<const uint8_t
> *>(NameStrings.data() +
> +
> NameStrings.size());
> + while (P < EndP) {
> + uint32_t N;
> + uint64_t UncompressedSize = decodeULEB128(P, &N);
> + P += N;
> + uint64_t CompressedSize = decodeULEB128(P, &N);
> + P += N;
> + bool isCompressed = (CompressedSize != 0);
> + SmallString<128> UncompressedNameStrings;
> + StringRef NameStrings;
> + if (isCompressed) {
> + StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
> + CompressedSize);
> + if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
> + UncompressedSize) != zlib::StatusOK)
> + return 1;
> + P += CompressedSize;
> + NameStrings = StringRef(UncompressedNameStrings.data(),
> + UncompressedNameStrings.size());
> + } else {
> + NameStrings =
> + StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
> + P += UncompressedSize;
> + }
>
>
> --
>
> + // Now parse the name strings.
> + size_t NameStart = 0;
> + bool isLast = false;
> + do {
> + size_t NameStop = NameStrings.find(' ', NameStart);
> + if (NameStop == StringRef::npos)
> + return 1;
> + if (NameStop == NameStrings.size() - 1)
> + isLast = true;
> + StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
> + Symtab.addFuncName(Name);
> + if (isLast)
> + break;
> + NameStart = NameStop + 1;
> + } while (true);
>
>
> --
>
> It would be simpler to do this:
>
> llvm::SmallVector<StringRef, 0> Names;
> NameStrings.split(Names, ' ');
> for (StringRef &Name : Names) Symtab.addFuncName(Name);
>
>
> +
> + while (P < EndP && *P == 0)
> + P++;
> + }
> + Symtab.finalizeSymtab();
> + return 0;
> +}
> +
> instrprof_error
> InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
> uint64_t Weight) {
>
> Modified: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ProfileData/InstrProfTest.cpp?rev=256667&r1=256666&r2=256667&view=diff
> ==============================================================================
> --- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp (original)
> +++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp Thu Dec 31 01:57:16
> 2015
> @@ -9,6 +9,7 @@
>
> #include "llvm/ProfileData/InstrProfReader.h"
> #include "llvm/ProfileData/InstrProfWriter.h"
> +#include "llvm/Support/Compression.h"
> #include "gtest/gtest.h"
>
> #include <cstdarg>
> @@ -583,4 +584,64 @@ TEST_F(InstrProfTest, instr_prof_symtab_
> ASSERT_EQ(StringRef("bar3"), R);
> }
>
> +TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
> + std::vector<std::string> FuncNames1;
> + std::vector<std::string> FuncNames2;
> + for (int I = 0; I < 10 * 1024; I++) {
> + std::string str;
> + raw_string_ostream OS(str);
> + OS << "func_" << I;
> + FuncNames1.push_back(OS.str());
> + str.clear();
> + OS << "fooooooooooooooo_" << I;
> + FuncNames1.push_back(OS.str());
> + str.clear();
> + OS << "BAR_" << I;
> + FuncNames2.push_back(OS.str());
> + str.clear();
> + OS << "BlahblahBlahblahBar_" << I;
> + FuncNames2.push_back(OS.str());
> + }
> +
> + for (int Padding = 0; Padding < 10; Padding++) {
> + for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
> + // Compressing:
> + std::string FuncNameStrings1;
> + collectPGOFuncNameStrings(FuncNames1,
> + (DoCompression != 0 &&
> zlib::isAvailable()),
> + FuncNameStrings1);
> +
> + // Compressing:
> + std::string FuncNameStrings2;
> + collectPGOFuncNameStrings(FuncNames2,
> + (DoCompression != 0 &&
> zlib::isAvailable()),
> + FuncNameStrings2);
> +
> + // Join with paddings:
> + std::string FuncNameStrings = FuncNameStrings1;
> + for (int P = 0; P < Padding; P++) {
> + FuncNameStrings.push_back('\0');
> + }
> + FuncNameStrings += FuncNameStrings2;
> +
> + // Now decompress
> + InstrProfSymtab Symtab;
> + Symtab.create(StringRef(FuncNameStrings));
> +
> + // Now check
> + for (int I = 0; I < 10 * 1024; I++) {
> + std::string N[4];
> + N[0] = FuncNames1[2 * I];
> + N[1] = FuncNames1[2 * I + 1];
> + N[2] = FuncNames2[2 * I];
> + N[3] = FuncNames2[2 * I + 1];
> + for (int J = 0; J < 4; J++) {
> + StringRef R =
> Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
> + ASSERT_EQ(StringRef(N[J]), R);
> + }
> + }
> + }
> + }
> +}
> +
> } // end anonymous namespace
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
>
> thanks
> vedant
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
More information about the llvm-commits
mailing list