[llvm] r201940 - MC: Support COFF string tables larger than 10MB

Rui Ueyama ruiu at google.com
Mon Feb 24 22:32:01 PST 2014


On Sat, Feb 22, 2014 at 8:12 AM, Nico Rieck <nico.rieck at gmail.com> wrote:

> Author: nrieck
> Date: Sat Feb 22 10:12:20 2014
> New Revision: 201940
>
> URL: http://llvm.org/viewvc/llvm-project?rev=201940&view=rev
> Log:
> MC: Support COFF string tables larger than 10MB
>
> Offsets past the range of single-slash encoding are encoded as base64,
> padded to 6 characters, and prefixed with two slashes. This encoding is
> undocumented but used by MSVC.
>
> Modified:
>     llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp
>     llvm/trunk/lib/Object/COFFObjectFile.cpp
>     llvm/trunk/test/MC/COFF/section-name-encoding.s
>
> Modified: llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp?rev=201940&r1=201939&r2=201940&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp (original)
> +++ llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp Sat Feb 22 10:12:20 2014
> @@ -468,12 +468,35 @@ void WinCOFFObjectWriter::DefineSymbol(M
>    }
>  }
>
> +// Encode a string table entry offset in base 64, padded to 6 chars, and
> +// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
> +// Buffer must be at least 8 bytes large. No terminating null appended.
> +static void encodeBase64StringEntry(char* Buffer, uint64_t Value) {
> +  assert(Value > 9999999 && Value <= 0xFFFFFFFFF &&
> +         "Illegal section name encoding for value");
> +
> +  static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> +                                 "abcdefghijklmnopqrstuvwxyz"
> +                                 "0123456789+/";
> +
> +  Buffer[0] = '/';
> +  Buffer[1] = '/';
> +
> +  char* Ptr = Buffer + 7;
> +  for (unsigned i = 0; i < 6; ++i) {
> +    unsigned Rem = Value % 64;
> +    Value /= 64;
> +    *(Ptr--) = Alphabet[Rem];
> +  }
> +}
> +
>  /// making a section real involves assigned it a number and putting
>  /// name into the string table if needed
>  void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
>    if (S.Name.size() > COFF::NameSize) {
>      const unsigned Max6DecimalSize = 999999;
>      const unsigned Max7DecimalSize = 9999999;
> +    const uint64_t MaxBase64Size = 0xFFFFFFFFF; // 64^6, including 0
>      uint64_t StringTableEntry = Strings.insert(S.Name.c_str());
>
>      if (StringTableEntry <= Max6DecimalSize) {
> @@ -484,8 +507,11 @@ void WinCOFFObjectWriter::MakeSectionRea
>        char buffer[9] = { };
>        std::sprintf(buffer, "/%d", unsigned(StringTableEntry));
>        std::memcpy(S.Header.Name, buffer, 8);
> +    } else if (StringTableEntry <= MaxBase64Size) {
> +      // Starting with 10,000,000, offsets are encoded as base64.
> +      encodeBase64StringEntry(S.Header.Name, StringTableEntry);
>      } else {
> -      report_fatal_error("COFF string table is greater than 9,999,999
> bytes.");
> +      report_fatal_error("COFF string table is greater than 64 GB.");
>      }
>    } else
>      std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
>
> Modified: llvm/trunk/lib/Object/COFFObjectFile.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/COFFObjectFile.cpp?rev=201940&r1=201939&r2=201940&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Object/COFFObjectFile.cpp (original)
> +++ llvm/trunk/lib/Object/COFFObjectFile.cpp Sat Feb 22 10:12:20 2014
> @@ -19,6 +19,7 @@
>  #include "llvm/Support/Debug.h"
>  #include "llvm/Support/raw_ostream.h"
>  #include <cctype>
> +#include <limits>
>
>  using namespace llvm;
>  using namespace object;
> @@ -52,6 +53,40 @@ static error_code getObject(const T *&Ob
>    return object_error::success;
>  }
>
> +// Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str
> without
> +// prefixed slashes.
> +static bool decodeBase64StringEntry(StringRef Str, uint32_t &Result) {
> +  assert(Str.size() <= 6 && "String too long, possible overflow.");
> +  if (Str.size() > 6)
> +    return true;
> +
> +  uint64_t Value = 0;
> +  while (!Str.empty()) {
> +    unsigned CharVal;
> +    if (Str[0] >= 'A' && Str[0] <= 'Z') // 0..25
> +      CharVal = Str[0] - 'A';
> +    else if (Str[0] >= 'a' && Str[0] <= 'z') // 26..51
> +      CharVal = Str[0] - 'a' + 26;
> +    else if (Str[0] >= '0' && Str[0] <= '9') // 52..61
> +      CharVal = Str[0] - '0' + 52;
> +    else if (Str[0] == '+') // 62
> +      CharVal = Str[0] - '+' + 62;
>

Is this line equivalent to "CharVal = 62;"?

+    else if (Str[0] == '/') // 63
> +      CharVal = Str[0] - '/' + 63;
>

Ditto


> +    else
> +      return true;
> +
> +    Value = (Value * 64) + CharVal;
> +    Str = Str.substr(1);
> +  }
> +
> +  if (Value > std::numeric_limits<uint32_t>::max())
> +    return true;
> +
> +  Result = static_cast<uint32_t>(Value);
> +  return false;
> +}
> +
>  const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Ref) const {
>    const coff_symbol *Addr = reinterpret_cast<const coff_symbol*>(Ref.p);
>
> @@ -766,8 +801,13 @@ error_code COFFObjectFile::getSectionNam
>    // Check for string table entry. First byte is '/'.
>    if (Name[0] == '/') {
>      uint32_t Offset;
> -    if (Name.substr(1).getAsInteger(10, Offset))
> -      return object_error::parse_failed;
> +    if (Name[1] == '/') {
> +      if (decodeBase64StringEntry(Name.substr(2), Offset))
> +        return object_error::parse_failed;
> +    } else {
> +      if (Name.substr(1).getAsInteger(10, Offset))
> +        return object_error::parse_failed;
> +    }
>      if (error_code EC = getString(Offset, Name))
>        return EC;
>    }
>
> Modified: llvm/trunk/test/MC/COFF/section-name-encoding.s
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/COFF/section-name-encoding.s?rev=201940&r1=201939&r2=201940&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/MC/COFF/section-name-encoding.s (original)
> +++ llvm/trunk/test/MC/COFF/section-name-encoding.s Sat Feb 22 10:12:20
> 2014
> @@ -3,6 +3,7 @@
>  // Encodings for different lengths:
>  //   [0, 8]:               raw name
>  //   (8, 999999]:          base 10 string table index (/9999999)
> +//   (999999, 0xFFFFFFFF]: base 64 string table index (//AAAAAA)
>  //
>  // RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj
> -s | FileCheck %s
>
> @@ -60,3 +61,28 @@ pad_sections aaaaaaaaaaaaaaaaaaaaaaaaaaa
>  // CHECK:     Name: seven_digit (2F 31 30 30 30 30 32 39)
>  // CHECK:   }
>  .section seven_digit; .long 1
> +
> +
> +// Generate padding sections to increase the string table size to at least
> +// 10,000,000 bytes.
> +.macro pad_sections_ex pad
> +  // 9x \pad
> +  pad_sections \pad\pad\pad\pad\pad\pad\pad\pad\pad
> +.endm
> +
> +// 1000x 'a'
> +pad_sections_ex
> aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!
>  aaaaaaaaa
>  aaaaaaaaaaaaaaaaaaa
> +
> +
> +// //AAmJa4 == 1000029 + 12 + (5 * (2 + (9 * 20 * 10 * 1000) + 1)) ==
> 38*64^3 + 9*64^2 + 26*64 + 56
> +//             v         |     |    v    ~~~~~~~~~~~~~~~~~~    v
> +// seven_digit offset    v     v   "p0"         pad            NUL
> seperator
> +//         "seven_digit\0"     # of pad sections
> +//
> +// "2F 2F 41 41 6D 4A 61 34" is "//AAmJa4", which decodes to "0 0 38 9 26
> 56".
> +//
> +// CHECK:   Section {
> +// CHECK:     Number: 15
> +// CHECK:     Name: double_slash (2F 2F 41 41 6D 4A 61 34)
> +// CHECK:   }
> +.section double_slash; .long 1
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140224/b683a18f/attachment.html>


More information about the llvm-commits mailing list