[llvm] r256667 - [PGO]: Implement Func PGO name string compression
Xinliang David Li via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 4 14:14:24 PST 2016
great comments. I have addresses all of them.
thanks,
David
On Mon, Jan 4, 2016 at 10:28 AM, Vedant Kumar <vsk at apple.com> wrote:
> Hi David,
>
> I have a few comments on this (inline).
>
>> +int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
>> + bool doCompression, std::string &Result) {
>> + uint8_t Header[16], *P = Header;
>> + std::string UncompressedNameStrings;
>> +
>> + for (auto NameStr : NameStrs) {
>> + UncompressedNameStrings += NameStr;
>> + UncompressedNameStrings.append(" ");
>> + }
>
> The complexity of this loop is quadratic in the average name length.
>
> Wdyt of (1) summing the name lengths, (2) allocating a char buffer,
> and (3) copying in the NameStrs manually?
>
>
>> + unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
>> + P += EncLen;
>> + if (!doCompression) {
>> + EncLen = encodeULEB128(0, P);
>> + P += EncLen;
>> + Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
>
> This last line is tricky to read. See my comment below for an alternative.
>
>
>> + Result += UncompressedNameStrings;
>> + return 0;
>> + }
>> + SmallVector<char, 128> CompressedNameStrings;
>> + zlib::Status Success =
>> + zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
>> + zlib::BestSizeCompression);
>> + assert(Success == zlib::StatusOK);
>
> This assert is fine for debugging but I don't think it belongs in-tree.
>
>
>> + if (Success != zlib::StatusOK)
>> + return 1;
>> + EncLen = encodeULEB128(CompressedNameStrings.size(), P);
>> + P += EncLen;
>> + Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
>
> Since this line is needed twice, why not lift it into a lambda like:
>
> auto AppendLengthsToResult = [&] {
> // Store the uncompressed and compressed name string lengths in \p Result.
> char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
> unsigned HeaderLen = P - &Header[0];
> Result.append(HeaderStr, HeaderLen);
> };
>
>
>> + Result +=
>> + std::string(CompressedNameStrings.data(), CompressedNameStrings.size());
>> + return 0;
>> +}
>> +
>> +int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
>> + std::string &Result) {
>> + std::vector<std::string> NameStrs;
>> + for (auto *NameVar : NameVars) {
>> + auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
>> + StringRef NameStr =
>> + Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
>> + NameStrs.push_back(NameStr.str());
>> + }
>> + return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
>> +}
>> +
>> +int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
>> + const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
>> + const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
>> + NameStrings.size());
>> + while (P < EndP) {
>> + uint32_t N;
>> + uint64_t UncompressedSize = decodeULEB128(P, &N);
>> + P += N;
>> + uint64_t CompressedSize = decodeULEB128(P, &N);
>> + P += N;
>> + bool isCompressed = (CompressedSize != 0);
>> + SmallString<128> UncompressedNameStrings;
>> + StringRef NameStrings;
>> + if (isCompressed) {
>> + StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
>> + CompressedSize);
>> + if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
>> + UncompressedSize) != zlib::StatusOK)
>> + return 1;
>> + P += CompressedSize;
>> + NameStrings = StringRef(UncompressedNameStrings.data(),
>> + UncompressedNameStrings.size());
>> + } else {
>> + NameStrings =
>> + StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
>> + P += UncompressedSize;
>> + }
>
> --
>
>> + // Now parse the name strings.
>> + size_t NameStart = 0;
>> + bool isLast = false;
>> + do {
>> + size_t NameStop = NameStrings.find(' ', NameStart);
>> + if (NameStop == StringRef::npos)
>> + return 1;
>> + if (NameStop == NameStrings.size() - 1)
>> + isLast = true;
>> + StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
>> + Symtab.addFuncName(Name);
>> + if (isLast)
>> + break;
>> + NameStart = NameStop + 1;
>> + } while (true);
>
> --
>
> It would be simpler to do this:
>
> llvm::SmallVector<StringRef, 0> Names;
> NameStrings.split(Names, ' ');
> for (StringRef &Name : Names) Symtab.addFuncName(Name);
>
>
>> +
>> + while (P < EndP && *P == 0)
>> + P++;
>> + }
>> + Symtab.finalizeSymtab();
>> + return 0;
>> +}
>> +
>> instrprof_error
>> InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
>> uint64_t Weight) {
>>
>> Modified: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ProfileData/InstrProfTest.cpp?rev=256667&r1=256666&r2=256667&view=diff
>> ==============================================================================
>> --- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp (original)
>> +++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp Thu Dec 31 01:57:16 2015
>> @@ -9,6 +9,7 @@
>>
>> #include "llvm/ProfileData/InstrProfReader.h"
>> #include "llvm/ProfileData/InstrProfWriter.h"
>> +#include "llvm/Support/Compression.h"
>> #include "gtest/gtest.h"
>>
>> #include <cstdarg>
>> @@ -583,4 +584,64 @@ TEST_F(InstrProfTest, instr_prof_symtab_
>> ASSERT_EQ(StringRef("bar3"), R);
>> }
>>
>> +TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
>> + std::vector<std::string> FuncNames1;
>> + std::vector<std::string> FuncNames2;
>> + for (int I = 0; I < 10 * 1024; I++) {
>> + std::string str;
>> + raw_string_ostream OS(str);
>> + OS << "func_" << I;
>> + FuncNames1.push_back(OS.str());
>> + str.clear();
>> + OS << "fooooooooooooooo_" << I;
>> + FuncNames1.push_back(OS.str());
>> + str.clear();
>> + OS << "BAR_" << I;
>> + FuncNames2.push_back(OS.str());
>> + str.clear();
>> + OS << "BlahblahBlahblahBar_" << I;
>> + FuncNames2.push_back(OS.str());
>> + }
>> +
>> + for (int Padding = 0; Padding < 10; Padding++) {
>> + for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
>> + // Compressing:
>> + std::string FuncNameStrings1;
>> + collectPGOFuncNameStrings(FuncNames1,
>> + (DoCompression != 0 && zlib::isAvailable()),
>> + FuncNameStrings1);
>> +
>> + // Compressing:
>> + std::string FuncNameStrings2;
>> + collectPGOFuncNameStrings(FuncNames2,
>> + (DoCompression != 0 && zlib::isAvailable()),
>> + FuncNameStrings2);
>> +
>> + // Join with paddings:
>> + std::string FuncNameStrings = FuncNameStrings1;
>> + for (int P = 0; P < Padding; P++) {
>> + FuncNameStrings.push_back('\0');
>> + }
>> + FuncNameStrings += FuncNameStrings2;
>> +
>> + // Now decompress
>> + InstrProfSymtab Symtab;
>> + Symtab.create(StringRef(FuncNameStrings));
>> +
>> + // Now check
>> + for (int I = 0; I < 10 * 1024; I++) {
>> + std::string N[4];
>> + N[0] = FuncNames1[2 * I];
>> + N[1] = FuncNames1[2 * I + 1];
>> + N[2] = FuncNames2[2 * I];
>> + N[3] = FuncNames2[2 * I + 1];
>> + for (int J = 0; J < 4; J++) {
>> + StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
>> + ASSERT_EQ(StringRef(N[J]), R);
>> + }
>> + }
>> + }
>> + }
>> +}
>> +
>> } // end anonymous namespace
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
> thanks
> vedant
>
More information about the llvm-commits
mailing list