[lld] r209844 - [mach-o] Add support for parsing CFString sections

Nick Kledzik kledzik at apple.com
Thu May 29 13:44:21 PDT 2014


Author: kledzik
Date: Thu May 29 15:44:21 2014
New Revision: 209844

URL: http://llvm.org/viewvc/llvm-project?rev=209844&view=rev
Log:
[mach-o] Add support for parsing CFString sections

Added:
    lld/trunk/test/mach-o/parse-cfstring32.yaml
    lld/trunk/test/mach-o/parse-cfstring64.yaml
Modified:
    lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
    lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp

Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp?rev=209844&r1=209843&r2=209844&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp Thu May 29 15:44:21 2014
@@ -197,6 +197,9 @@ SectionInfo *Util::makeSection(DefinedAt
   case DefinedAtom::typeUTF16String:
      return new (_allocator) SectionInfo("__TEXT", "__ustring",
                             S_REGULAR);
+  case DefinedAtom::typeCFString:
+     return new (_allocator) SectionInfo("__DATA", "__cfstring",
+                            S_REGULAR);
   default:
     llvm_unreachable("TO DO: add support for more sections");
     break;

Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp?rev=209844&r1=209843&r2=209844&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp Thu May 29 15:44:21 2014
@@ -87,8 +87,12 @@ static void processSymbol(const Normaliz
   const Section &section = normalizedFile.sections[sym.sect - 1];
   uint64_t offset = sym.value - section.address;
   uint64_t size = nextSymbolAddress(normalizedFile, sym) - sym.value;
-  if (section.type == llvm::MachO::S_ZEROFILL){
+  if (section.type == llvm::MachO::S_ZEROFILL) {
     file.addZeroFillDefinedAtom(sym.name, atomScope(sym.scope), size, copyRefs);
+  }
+  else if ((section.type == llvm::MachO::S_CSTRING_LITERALS) &&
+          (sym.name[0] == 'L')) {
+    // Ignore L labels on cstrings.
   } else {
     ArrayRef<uint8_t> atomContent = section.content.slice(offset, size);
     DefinedAtom::Merge m = DefinedAtom::mergeNo;
@@ -111,37 +115,72 @@ static void processUndefindeSymbol(MachO
   }
 }
 
+// A __TEXT/__ustring section contains UTF16 strings.  Atom boundaries are
+// determined by finding the terminating 0x0000 in each string.
+static error_code processUTF16Section(MachOFile &file, const Section &section,
+                                      bool is64, bool copyRefs) {
+  if ((section.content.size() % 4) != 0)
+    return make_dynamic_error_code(Twine("Section ") + section.segmentName
+                                 + "/" + section.sectionName
+                                 + " has a size that is not even");
+  unsigned offset = 0;
+  for (size_t i = 0, e = section.content.size(); i != e; i +=2) {
+    if ((section.content[i] == 0) && (section.content[i+1] == 0)) {
+      unsigned size = i - offset + 2;
+      ArrayRef<uint8_t> utf16Content = section.content.slice(offset, size);
+      file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit,
+                          DefinedAtom::typeUTF16String,
+                          DefinedAtom::mergeByContent, utf16Content,
+                          copyRefs);
+      offset = i + 2;
+    }
+  }
+  if (offset != section.content.size()) {
+    return make_dynamic_error_code(Twine("Section ") + section.segmentName
+                                   + "/" + section.sectionName
+                                   + " is supposed to contain 0x0000 "
+                                   "terminated UTF16 strings, but the "
+                                   "last string in the section is not zero "
+                                   "terminated.");
+  }
+  return error_code::success();
+}
+
+// A __DATA/__cfstring section contain NS/CFString objects. Atom boundaries
+// are determined because each object is known to be 4 pointers in size.
+static error_code processCFStringSection(MachOFile &file,const Section &section,
+                                      bool is64, bool copyRefs) {
+  const uint32_t cfsObjSize = (is64 ? 32 : 16);
+  if ((section.content.size() % cfsObjSize) != 0) {
+    return make_dynamic_error_code(Twine("Section __DATA/__cfstring has a size "
+                                   "(" + Twine(section.content.size())
+                                   + ") that is not a multiple of "
+                                   + Twine(cfsObjSize)));
+  }
+  unsigned offset = 0;
+  for (size_t i = 0, e = section.content.size(); i != e; i += cfsObjSize) {
+    ArrayRef<uint8_t> byteContent = section.content.slice(offset, cfsObjSize);
+    file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit,
+                        DefinedAtom::typeCFString,
+                        DefinedAtom::mergeByContent, byteContent, copyRefs);
+    offset += cfsObjSize;
+  }
+  return error_code::success();
+}
+
 static error_code processSection(MachOFile &file, const Section &section,
                                  bool is64, bool copyRefs) {
   unsigned offset = 0;
   const unsigned pointerSize = (is64 ? 8 : 4);
   switch (section.type) {
   case llvm::MachO::S_REGULAR:
-    if (section.segmentName.equals("__TEXT") && 
+    if (section.segmentName.equals("__TEXT") &&
         section.sectionName.equals("__ustring")) {
-      if ((section.content.size() % 4) != 0)
-        return make_dynamic_error_code(Twine("Section ") + section.segmentName
-                                     + "/" + section.sectionName 
-                                     + " has a size that is not even"); 
-      for (size_t i = 0, e = section.content.size(); i != e; i +=2) {
-        if ((section.content[i] == 0) && (section.content[i+1] == 0)) {
-          unsigned size = i - offset + 2;
-          ArrayRef<uint8_t> utf16Content = section.content.slice(offset, size);
-          file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit,
-                              DefinedAtom::typeUTF16String,
-                              DefinedAtom::mergeByContent, utf16Content,
-                              copyRefs);
-          offset = i + 2;
-        }
-      }
-      if (offset != section.content.size()) {
-        return make_dynamic_error_code(Twine("Section ") + section.segmentName
-                                       + "/" + section.sectionName 
-                                       + " is supposed to contain 0x0000 "
-                                       "terminated UTF16 strings, but the "
-                                       "last string in the section is not zero "
-                                       "terminated."); 
-      }
+      return processUTF16Section(file, section, is64, copyRefs);
+    }
+    else if (section.segmentName.equals("__DATA") &&
+             section.sectionName.equals("__cfstring")) {
+      return processCFStringSection(file, section, is64, copyRefs);
     }
     break;
   case llvm::MachO::S_COALESCED:

Added: lld/trunk/test/mach-o/parse-cfstring32.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/mach-o/parse-cfstring32.yaml?rev=209844&view=auto
==============================================================================
--- lld/trunk/test/mach-o/parse-cfstring32.yaml (added)
+++ lld/trunk/test/mach-o/parse-cfstring32.yaml Thu May 29 15:44:21 2014
@@ -0,0 +1,78 @@
+# RUN: lld -flavor darwin -arch i386 -r -print_atoms %s -o %t  | FileCheck %s
+#
+# Test parsing of mach-o functions.
+#
+
+--- !mach-o
+arch:            x86
+file-type:       MH_OBJECT
+flags:           [ MH_SUBSECTIONS_VIA_SYMBOLS ]
+has-UUID:        false
+OS:              unknown
+sections:
+  - segment:         __TEXT
+    section:         __cstring
+    type:            S_CSTRING_LITERALS
+    attributes:      [  ]
+    address:         0x0000000000000000
+    content:         [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68,
+                       0x65, 0x72, 0x65, 0x00 ]
+  - segment:         __DATA
+    section:         __cfstring
+    type:            S_REGULAR
+    attributes:      [  ]
+    alignment:       3
+    address:         0x0000000000000010
+    content:         [ 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00,
+                       0x06, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00 ]
+    relocations:
+      - offset:          0x00000018
+        type:            GENERIC_RELOC_VANILLA
+        length:          2
+        pc-rel:          false
+        extern:          false
+        symbol:          1
+      - offset:          0x00000010
+        type:            GENERIC_RELOC_VANILLA
+        length:          2
+        pc-rel:          false
+        extern:          true
+        symbol:          1
+      - offset:          0x00000008
+        type:            GENERIC_RELOC_VANILLA
+        length:          2
+        pc-rel:          false
+        extern:          false
+        symbol:          1
+      - offset:          0x00000000
+        type:            GENERIC_RELOC_VANILLA
+        length:          2
+        pc-rel:          false
+        extern:          true
+        symbol:          1
+undefined-symbols:
+  - name:            ___CFConstantStringClassReference
+    type:            N_UNDF
+    scope:           [ N_EXT ]
+    value:           0x0000000000000000
+...
+
+# CHECK: defined-atoms:
+# CHECK:  - scope:           hidden
+# CHECK:    type:            c-string
+# CHECK:    content:         [ 68, 65, 6C, 6C, 6F, 00 ]
+# CHECK:    merge:           by-content
+# CHECK:  - scope:           hidden
+# CHECK:    type:            c-string
+# CHECK:    content:         [ 74, 68, 65, 72, 65, 00 ]
+# CHECK:    merge:           by-content
+# CHECK:  - scope:           hidden
+# CHECK:    type:            cfstring
+# CHECK:    merge:           by-content
+# CHECK:  - scope:           hidden
+# CHECK:    type:            cfstring
+# CHECK:    merge:           by-content
+# CHECK:undefined-atoms:
+# CHECK:  - name:            ___CFConstantStringClassReference

Added: lld/trunk/test/mach-o/parse-cfstring64.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/mach-o/parse-cfstring64.yaml?rev=209844&view=auto
==============================================================================
--- lld/trunk/test/mach-o/parse-cfstring64.yaml (added)
+++ lld/trunk/test/mach-o/parse-cfstring64.yaml Thu May 29 15:44:21 2014
@@ -0,0 +1,91 @@
+# RUN: lld -flavor darwin -arch x86_64 -r -print_atoms %s -o %t  | FileCheck %s
+#
+# Test parsing of CFString constants.
+#
+
+--- !mach-o
+arch:            x86_64
+file-type:       MH_OBJECT
+flags:           [ MH_SUBSECTIONS_VIA_SYMBOLS ]
+has-UUID:        false
+OS:              unknown
+sections:
+  - segment:         __TEXT
+    section:         __cstring
+    type:            S_CSTRING_LITERALS
+    attributes:      [  ]
+    address:         0x0000000000000000
+    content:         [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68,
+                       0x65, 0x72, 0x65, 0x00 ]
+  - segment:         __DATA
+    section:         __cfstring
+    type:            S_REGULAR
+    attributes:      [  ]
+    alignment:       4
+    address:         0x0000000000000010
+    content:         [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                       0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                       0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                       0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                       0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ]
+    relocations:
+      - offset:          0x00000030
+        type:            X86_64_RELOC_UNSIGNED
+        length:          3
+        pc-rel:          false
+        extern:          true
+        symbol:          1
+      - offset:          0x00000020
+        type:            X86_64_RELOC_UNSIGNED
+        length:          3
+        pc-rel:          false
+        extern:          true
+        symbol:          2
+      - offset:          0x00000010
+        type:            X86_64_RELOC_UNSIGNED
+        length:          3
+        pc-rel:          false
+        extern:          true
+        symbol:          0
+      - offset:          0x00000000
+        type:            X86_64_RELOC_UNSIGNED
+        length:          3
+        pc-rel:          false
+        extern:          true
+        symbol:          2
+local-symbols:
+  - name:            Lstr1
+    type:            N_SECT
+    sect:            1
+    value:           0x0000000000000000
+  - name:            Lstr2
+    type:            N_SECT
+    sect:            1
+    value:           0x0000000000000006
+undefined-symbols:
+  - name:            ___CFConstantStringClassReference
+    type:            N_UNDF
+    scope:           [ N_EXT ]
+    value:           0x0000000000000000
+...
+
+# CHECK: defined-atoms:
+# CHECK:  - scope:           hidden
+# CHECK:    type:            c-string
+# CHECK:    content:         [ 68, 65, 6C, 6C, 6F, 00 ]
+# CHECK:    merge:           by-content
+# CHECK:  - scope:           hidden
+# CHECK:    type:            c-string
+# CHECK:    content:         [ 74, 68, 65, 72, 65, 00 ]
+# CHECK:    merge:           by-content
+# CHECK:  - scope:           hidden
+# CHECK:    type:            cfstring
+# CHECK:    merge:           by-content
+# CHECK:  - scope:           hidden
+# CHECK:    type:            cfstring
+# CHECK:    merge:           by-content
+# CHECK:undefined-atoms:
+# CHECK:  - name:            ___CFConstantStringClassReference





More information about the llvm-commits mailing list