[llvm] r256667 - [PGO]: Implement Func PGO name string compression
Xinliang David Li via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 30 23:57:17 PST 2015
Author: davidxl
Date: Thu Dec 31 01:57:16 2015
New Revision: 256667
URL: http://llvm.org/viewvc/llvm-project?rev=256667&view=rev
Log:
[PGO]: Implement Func PGO name string compression
This is part of the effort/prepration to reduce the size
instr-pgo (object, binary, memory footprint, and raw data).
The functionality is currently off by default and not yet
used by any clients.
Modified:
llvm/trunk/include/llvm/ProfileData/InstrProf.h
llvm/trunk/lib/ProfileData/InstrProf.cpp
llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
Modified: llvm/trunk/include/llvm/ProfileData/InstrProf.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ProfileData/InstrProf.h?rev=256667&r1=256666&r2=256667&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ProfileData/InstrProf.h (original)
+++ llvm/trunk/include/llvm/ProfileData/InstrProf.h Thu Dec 31 01:57:16 2015
@@ -160,6 +160,29 @@ GlobalVariable *createPGOFuncNameVar(Mod
/// the original (static) function name.
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
+/// Given a vector of strings (function PGO names) \c NameStrs, the
+/// method generates a combined string \c Result thatis ready to be
+/// serialized. The \c Result string is comprised of three fields:
+/// The first field is the legnth of the uncompressed strings, and the
+/// the second field is the length of the zlib-compressed string.
+/// Both fields are encoded in ULEB128. If \c doCompress is false, the
+/// third field is the uncompressed strings; otherwise it is the
+/// compressed string. When the string compression is off, the
+/// second field will have value zero.
+int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+ bool doCompression, std::string &Result);
+/// Produce \c Result string with the same format described above. The input
+/// is vector of PGO function name variables that are referenced.
+int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+ std::string &Result);
+class InstrProfSymtab;
+/// \c NameStrings is a string composed of one of more sub-strings encoded in
+/// the
+/// format described above. The substrings are seperated by 0 or more zero
+/// bytes.
+/// This method decodes the string and populates the \c Symtab.
+int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
+
const std::error_category &instrprof_category();
enum class instrprof_error {
@@ -235,6 +258,11 @@ public:
/// This interface is used by reader of CoverageMapping test
/// format.
inline std::error_code create(StringRef D, uint64_t BaseAddr);
+ /// \c NameStrings is a string composed of one of more sub-strings
+ /// encoded in the format described above. The substrings are
+ /// seperated by 0 or more zero bytes. This method decodes the
+ /// string and populates the \c Symtab.
+ inline std::error_code create(StringRef NameStrings);
/// Create InstrProfSymtab from a set of names iteratable from
/// \p IterRange. This interface is used by IndexedProfReader.
template <typename NameIterRange> void create(const NameIterRange &IterRange);
@@ -255,8 +283,8 @@ public:
AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
}
AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
- /// Return function's PGO name from the function name's symabol
- /// address in the object file. If an error occurs, Return
+ /// Return function's PGO name from the function name's symbol
+ /// address in the object file. If an error occurs, return
/// an empty string.
StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
/// Return function's PGO name from the name's md5 hash value.
@@ -270,6 +298,12 @@ std::error_code InstrProfSymtab::create(
return std::error_code();
}
+std::error_code InstrProfSymtab::create(StringRef NameStrings) {
+ if (readPGOFuncNameStrings(NameStrings, *this))
+ return make_error_code(instrprof_error::malformed);
+ return std::error_code();
+}
+
template <typename NameIterRange>
void InstrProfSymtab::create(const NameIterRange &IterRange) {
for (auto Name : IterRange)
Modified: llvm/trunk/lib/ProfileData/InstrProf.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ProfileData/InstrProf.cpp?rev=256667&r1=256666&r2=256667&view=diff
==============================================================================
--- llvm/trunk/lib/ProfileData/InstrProf.cpp (original)
+++ llvm/trunk/lib/ProfileData/InstrProf.cpp Thu Dec 31 01:57:16 2015
@@ -12,12 +12,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
@@ -162,6 +164,101 @@ GlobalVariable *createPGOFuncNameVar(Fun
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
}
+int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+ bool doCompression, std::string &Result) {
+ uint8_t Header[16], *P = Header;
+ std::string UncompressedNameStrings;
+
+ for (auto NameStr : NameStrs) {
+ UncompressedNameStrings += NameStr;
+ UncompressedNameStrings.append(" ");
+ }
+ unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
+ P += EncLen;
+ if (!doCompression) {
+ EncLen = encodeULEB128(0, P);
+ P += EncLen;
+ Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
+ Result += UncompressedNameStrings;
+ return 0;
+ }
+ SmallVector<char, 128> CompressedNameStrings;
+ zlib::Status Success =
+ zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
+ zlib::BestSizeCompression);
+ assert(Success == zlib::StatusOK);
+ if (Success != zlib::StatusOK)
+ return 1;
+ EncLen = encodeULEB128(CompressedNameStrings.size(), P);
+ P += EncLen;
+ Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
+ Result +=
+ std::string(CompressedNameStrings.data(), CompressedNameStrings.size());
+ return 0;
+}
+
+int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+ std::string &Result) {
+ std::vector<std::string> NameStrs;
+ for (auto *NameVar : NameVars) {
+ auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
+ StringRef NameStr =
+ Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
+ NameStrs.push_back(NameStr.str());
+ }
+ return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
+}
+
+int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
+ const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
+ const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
+ NameStrings.size());
+ while (P < EndP) {
+ uint32_t N;
+ uint64_t UncompressedSize = decodeULEB128(P, &N);
+ P += N;
+ uint64_t CompressedSize = decodeULEB128(P, &N);
+ P += N;
+ bool isCompressed = (CompressedSize != 0);
+ SmallString<128> UncompressedNameStrings;
+ StringRef NameStrings;
+ if (isCompressed) {
+ StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
+ CompressedSize);
+ if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
+ UncompressedSize) != zlib::StatusOK)
+ return 1;
+ P += CompressedSize;
+ NameStrings = StringRef(UncompressedNameStrings.data(),
+ UncompressedNameStrings.size());
+ } else {
+ NameStrings =
+ StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
+ P += UncompressedSize;
+ }
+ // Now parse the name strings.
+ size_t NameStart = 0;
+ bool isLast = false;
+ do {
+ size_t NameStop = NameStrings.find(' ', NameStart);
+ if (NameStop == StringRef::npos)
+ return 1;
+ if (NameStop == NameStrings.size() - 1)
+ isLast = true;
+ StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
+ Symtab.addFuncName(Name);
+ if (isLast)
+ break;
+ NameStart = NameStop + 1;
+ } while (true);
+
+ while (P < EndP && *P == 0)
+ P++;
+ }
+ Symtab.finalizeSymtab();
+ return 0;
+}
+
instrprof_error
InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
uint64_t Weight) {
Modified: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ProfileData/InstrProfTest.cpp?rev=256667&r1=256666&r2=256667&view=diff
==============================================================================
--- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp (original)
+++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp Thu Dec 31 01:57:16 2015
@@ -9,6 +9,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProfWriter.h"
+#include "llvm/Support/Compression.h"
#include "gtest/gtest.h"
#include <cstdarg>
@@ -583,4 +584,64 @@ TEST_F(InstrProfTest, instr_prof_symtab_
ASSERT_EQ(StringRef("bar3"), R);
}
+TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
+ std::vector<std::string> FuncNames1;
+ std::vector<std::string> FuncNames2;
+ for (int I = 0; I < 10 * 1024; I++) {
+ std::string str;
+ raw_string_ostream OS(str);
+ OS << "func_" << I;
+ FuncNames1.push_back(OS.str());
+ str.clear();
+ OS << "fooooooooooooooo_" << I;
+ FuncNames1.push_back(OS.str());
+ str.clear();
+ OS << "BAR_" << I;
+ FuncNames2.push_back(OS.str());
+ str.clear();
+ OS << "BlahblahBlahblahBar_" << I;
+ FuncNames2.push_back(OS.str());
+ }
+
+ for (int Padding = 0; Padding < 10; Padding++) {
+ for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
+ // Compressing:
+ std::string FuncNameStrings1;
+ collectPGOFuncNameStrings(FuncNames1,
+ (DoCompression != 0 && zlib::isAvailable()),
+ FuncNameStrings1);
+
+ // Compressing:
+ std::string FuncNameStrings2;
+ collectPGOFuncNameStrings(FuncNames2,
+ (DoCompression != 0 && zlib::isAvailable()),
+ FuncNameStrings2);
+
+ // Join with paddings:
+ std::string FuncNameStrings = FuncNameStrings1;
+ for (int P = 0; P < Padding; P++) {
+ FuncNameStrings.push_back('\0');
+ }
+ FuncNameStrings += FuncNameStrings2;
+
+ // Now decompress
+ InstrProfSymtab Symtab;
+ Symtab.create(StringRef(FuncNameStrings));
+
+ // Now check
+ for (int I = 0; I < 10 * 1024; I++) {
+ std::string N[4];
+ N[0] = FuncNames1[2 * I];
+ N[1] = FuncNames1[2 * I + 1];
+ N[2] = FuncNames2[2 * I];
+ N[3] = FuncNames2[2 * I + 1];
+ for (int J = 0; J < 4; J++) {
+ StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
+ ASSERT_EQ(StringRef(N[J]), R);
+ }
+ }
+ }
+ }
+}
+
} // end anonymous namespace
More information about the llvm-commits
mailing list