[llvm] r201940 - MC: Support COFF string tables larger than 10MB

Nico Rieck nico.rieck at gmail.com
Sat Feb 22 08:12:20 PST 2014


Author: nrieck
Date: Sat Feb 22 10:12:20 2014
New Revision: 201940

URL: http://llvm.org/viewvc/llvm-project?rev=201940&view=rev
Log:
MC: Support COFF string tables larger than 10MB

Offsets past the range of single-slash encoding are encoded as base64,
padded to 6 characters, and prefixed with two slashes. This encoding is
undocumented but used by MSVC.

Modified:
    llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp
    llvm/trunk/lib/Object/COFFObjectFile.cpp
    llvm/trunk/test/MC/COFF/section-name-encoding.s

Modified: llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp?rev=201940&r1=201939&r2=201940&view=diff
==============================================================================
--- llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp (original)
+++ llvm/trunk/lib/MC/WinCOFFObjectWriter.cpp Sat Feb 22 10:12:20 2014
@@ -468,12 +468,35 @@ void WinCOFFObjectWriter::DefineSymbol(M
   }
 }
 
+// Encode a string table entry offset in base 64, padded to 6 chars, and
+// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
+// Buffer must be at least 8 bytes large. No terminating null appended.
+static void encodeBase64StringEntry(char* Buffer, uint64_t Value) {
+  assert(Value > 9999999 && Value <= 0xFFFFFFFFF &&
+         "Illegal section name encoding for value");
+
+  static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                 "abcdefghijklmnopqrstuvwxyz"
+                                 "0123456789+/";
+
+  Buffer[0] = '/';
+  Buffer[1] = '/';
+
+  char* Ptr = Buffer + 7;
+  for (unsigned i = 0; i < 6; ++i) {
+    unsigned Rem = Value % 64;
+    Value /= 64;
+    *(Ptr--) = Alphabet[Rem];
+  }
+}
+
 /// making a section real involves assigned it a number and putting
 /// name into the string table if needed
 void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
   if (S.Name.size() > COFF::NameSize) {
     const unsigned Max6DecimalSize = 999999;
     const unsigned Max7DecimalSize = 9999999;
+    const uint64_t MaxBase64Size = 0xFFFFFFFFF; // 64^6, including 0
     uint64_t StringTableEntry = Strings.insert(S.Name.c_str());
 
     if (StringTableEntry <= Max6DecimalSize) {
@@ -484,8 +507,11 @@ void WinCOFFObjectWriter::MakeSectionRea
       char buffer[9] = { };
       std::sprintf(buffer, "/%d", unsigned(StringTableEntry));
       std::memcpy(S.Header.Name, buffer, 8);
+    } else if (StringTableEntry <= MaxBase64Size) {
+      // Starting with 10,000,000, offsets are encoded as base64.
+      encodeBase64StringEntry(S.Header.Name, StringTableEntry);
     } else {
-      report_fatal_error("COFF string table is greater than 9,999,999 bytes.");
+      report_fatal_error("COFF string table is greater than 64 GB.");
     }
   } else
     std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());

Modified: llvm/trunk/lib/Object/COFFObjectFile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Object/COFFObjectFile.cpp?rev=201940&r1=201939&r2=201940&view=diff
==============================================================================
--- llvm/trunk/lib/Object/COFFObjectFile.cpp (original)
+++ llvm/trunk/lib/Object/COFFObjectFile.cpp Sat Feb 22 10:12:20 2014
@@ -19,6 +19,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
+#include <limits>
 
 using namespace llvm;
 using namespace object;
@@ -52,6 +53,40 @@ static error_code getObject(const T *&Ob
   return object_error::success;
 }
 
+// Decode a string table entry in base 64 (//AAAAAA). Expects \arg Str without
+// prefixed slashes.
+static bool decodeBase64StringEntry(StringRef Str, uint32_t &Result) {
+  assert(Str.size() <= 6 && "String too long, possible overflow.");
+  if (Str.size() > 6)
+    return true;
+
+  uint64_t Value = 0;
+  while (!Str.empty()) {
+    unsigned CharVal;
+    if (Str[0] >= 'A' && Str[0] <= 'Z') // 0..25
+      CharVal = Str[0] - 'A';
+    else if (Str[0] >= 'a' && Str[0] <= 'z') // 26..51
+      CharVal = Str[0] - 'a' + 26;
+    else if (Str[0] >= '0' && Str[0] <= '9') // 52..61
+      CharVal = Str[0] - '0' + 52;
+    else if (Str[0] == '+') // 62
+      CharVal = Str[0] - '+' + 62;
+    else if (Str[0] == '/') // 63
+      CharVal = Str[0] - '/' + 63;
+    else
+      return true;
+
+    Value = (Value * 64) + CharVal;
+    Str = Str.substr(1);
+  }
+
+  if (Value > std::numeric_limits<uint32_t>::max())
+    return true;
+
+  Result = static_cast<uint32_t>(Value);
+  return false;
+}
+
 const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Ref) const {
   const coff_symbol *Addr = reinterpret_cast<const coff_symbol*>(Ref.p);
 
@@ -766,8 +801,13 @@ error_code COFFObjectFile::getSectionNam
   // Check for string table entry. First byte is '/'.
   if (Name[0] == '/') {
     uint32_t Offset;
-    if (Name.substr(1).getAsInteger(10, Offset))
-      return object_error::parse_failed;
+    if (Name[1] == '/') {
+      if (decodeBase64StringEntry(Name.substr(2), Offset))
+        return object_error::parse_failed;
+    } else {
+      if (Name.substr(1).getAsInteger(10, Offset))
+        return object_error::parse_failed;
+    }
     if (error_code EC = getString(Offset, Name))
       return EC;
   }

Modified: llvm/trunk/test/MC/COFF/section-name-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/COFF/section-name-encoding.s?rev=201940&r1=201939&r2=201940&view=diff
==============================================================================
--- llvm/trunk/test/MC/COFF/section-name-encoding.s (original)
+++ llvm/trunk/test/MC/COFF/section-name-encoding.s Sat Feb 22 10:12:20 2014
@@ -3,6 +3,7 @@
 // Encodings for different lengths:
 //   [0, 8]:               raw name
 //   (8, 999999]:          base 10 string table index (/9999999)
+//   (999999, 0xFFFFFFFF]: base 64 string table index (//AAAAAA)
 //
 // RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s | FileCheck %s
 
@@ -60,3 +61,28 @@ pad_sections aaaaaaaaaaaaaaaaaaaaaaaaaaa
 // CHECK:     Name: seven_digit (2F 31 30 30 30 30 32 39)
 // CHECK:   }
 .section seven_digit; .long 1
+
+
+// Generate padding sections to increase the string table size to at least
+// 10,000,000 bytes.
+.macro pad_sections_ex pad
+  // 9x \pad
+  pad_sections \pad\pad\pad\pad\pad\pad\pad\pad\pad
+.endm
+
+// 1000x 'a'
+pad_sections_ex aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!
 aaaaaaaaa
 aaaaaaaaaaaaaaaaaaa
+
+
+// //AAmJa4 == 1000029 + 12 + (5 * (2 + (9 * 20 * 10 * 1000) + 1)) == 38*64^3 + 9*64^2 + 26*64 + 56
+//             v         |     |    v    ~~~~~~~~~~~~~~~~~~    v
+// seven_digit offset    v     v   "p0"         pad            NUL seperator
+//         "seven_digit\0"     # of pad sections
+//
+// "2F 2F 41 41 6D 4A 61 34" is "//AAmJa4", which decodes to "0 0 38 9 26 56".
+//
+// CHECK:   Section {
+// CHECK:     Number: 15
+// CHECK:     Name: double_slash (2F 2F 41 41 6D 4A 61 34)
+// CHECK:   }
+.section double_slash; .long 1





More information about the llvm-commits mailing list