[lld] r209844 - [mach-o] Add support for parsing CFString sections
Nick Kledzik
kledzik at apple.com
Thu May 29 13:44:21 PDT 2014
Author: kledzik
Date: Thu May 29 15:44:21 2014
New Revision: 209844
URL: http://llvm.org/viewvc/llvm-project?rev=209844&view=rev
Log:
[mach-o] Add support for parsing CFString sections
Added:
lld/trunk/test/mach-o/parse-cfstring32.yaml
lld/trunk/test/mach-o/parse-cfstring64.yaml
Modified:
lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp?rev=209844&r1=209843&r2=209844&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp Thu May 29 15:44:21 2014
@@ -197,6 +197,9 @@ SectionInfo *Util::makeSection(DefinedAt
case DefinedAtom::typeUTF16String:
return new (_allocator) SectionInfo("__TEXT", "__ustring",
S_REGULAR);
+ case DefinedAtom::typeCFString:
+ return new (_allocator) SectionInfo("__DATA", "__cfstring",
+ S_REGULAR);
default:
llvm_unreachable("TO DO: add support for more sections");
break;
Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp?rev=209844&r1=209843&r2=209844&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp Thu May 29 15:44:21 2014
@@ -87,8 +87,12 @@ static void processSymbol(const Normaliz
const Section §ion = normalizedFile.sections[sym.sect - 1];
uint64_t offset = sym.value - section.address;
uint64_t size = nextSymbolAddress(normalizedFile, sym) - sym.value;
- if (section.type == llvm::MachO::S_ZEROFILL){
+ if (section.type == llvm::MachO::S_ZEROFILL) {
file.addZeroFillDefinedAtom(sym.name, atomScope(sym.scope), size, copyRefs);
+ }
+ else if ((section.type == llvm::MachO::S_CSTRING_LITERALS) &&
+ (sym.name[0] == 'L')) {
+ // Ignore L labels on cstrings.
} else {
ArrayRef<uint8_t> atomContent = section.content.slice(offset, size);
DefinedAtom::Merge m = DefinedAtom::mergeNo;
@@ -111,37 +115,72 @@ static void processUndefindeSymbol(MachO
}
}
+// A __TEXT/__ustring section contains UTF16 strings. Atom boundaries are
+// determined by finding the terminating 0x0000 in each string.
+static error_code processUTF16Section(MachOFile &file, const Section §ion,
+ bool is64, bool copyRefs) {
+ if ((section.content.size() % 4) != 0)
+ return make_dynamic_error_code(Twine("Section ") + section.segmentName
+ + "/" + section.sectionName
+ + " has a size that is not even");
+ unsigned offset = 0;
+ for (size_t i = 0, e = section.content.size(); i != e; i +=2) {
+ if ((section.content[i] == 0) && (section.content[i+1] == 0)) {
+ unsigned size = i - offset + 2;
+ ArrayRef<uint8_t> utf16Content = section.content.slice(offset, size);
+ file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit,
+ DefinedAtom::typeUTF16String,
+ DefinedAtom::mergeByContent, utf16Content,
+ copyRefs);
+ offset = i + 2;
+ }
+ }
+ if (offset != section.content.size()) {
+ return make_dynamic_error_code(Twine("Section ") + section.segmentName
+ + "/" + section.sectionName
+ + " is supposed to contain 0x0000 "
+ "terminated UTF16 strings, but the "
+ "last string in the section is not zero "
+ "terminated.");
+ }
+ return error_code::success();
+}
+
+// A __DATA/__cfstring section contain NS/CFString objects. Atom boundaries
+// are determined because each object is known to be 4 pointers in size.
+static error_code processCFStringSection(MachOFile &file,const Section §ion,
+ bool is64, bool copyRefs) {
+ const uint32_t cfsObjSize = (is64 ? 32 : 16);
+ if ((section.content.size() % cfsObjSize) != 0) {
+ return make_dynamic_error_code(Twine("Section __DATA/__cfstring has a size "
+ "(" + Twine(section.content.size())
+ + ") that is not a multiple of "
+ + Twine(cfsObjSize)));
+ }
+ unsigned offset = 0;
+ for (size_t i = 0, e = section.content.size(); i != e; i += cfsObjSize) {
+ ArrayRef<uint8_t> byteContent = section.content.slice(offset, cfsObjSize);
+ file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit,
+ DefinedAtom::typeCFString,
+ DefinedAtom::mergeByContent, byteContent, copyRefs);
+ offset += cfsObjSize;
+ }
+ return error_code::success();
+}
+
static error_code processSection(MachOFile &file, const Section §ion,
bool is64, bool copyRefs) {
unsigned offset = 0;
const unsigned pointerSize = (is64 ? 8 : 4);
switch (section.type) {
case llvm::MachO::S_REGULAR:
- if (section.segmentName.equals("__TEXT") &&
+ if (section.segmentName.equals("__TEXT") &&
section.sectionName.equals("__ustring")) {
- if ((section.content.size() % 4) != 0)
- return make_dynamic_error_code(Twine("Section ") + section.segmentName
- + "/" + section.sectionName
- + " has a size that is not even");
- for (size_t i = 0, e = section.content.size(); i != e; i +=2) {
- if ((section.content[i] == 0) && (section.content[i+1] == 0)) {
- unsigned size = i - offset + 2;
- ArrayRef<uint8_t> utf16Content = section.content.slice(offset, size);
- file.addDefinedAtom(StringRef(), DefinedAtom::scopeLinkageUnit,
- DefinedAtom::typeUTF16String,
- DefinedAtom::mergeByContent, utf16Content,
- copyRefs);
- offset = i + 2;
- }
- }
- if (offset != section.content.size()) {
- return make_dynamic_error_code(Twine("Section ") + section.segmentName
- + "/" + section.sectionName
- + " is supposed to contain 0x0000 "
- "terminated UTF16 strings, but the "
- "last string in the section is not zero "
- "terminated.");
- }
+ return processUTF16Section(file, section, is64, copyRefs);
+ }
+ else if (section.segmentName.equals("__DATA") &&
+ section.sectionName.equals("__cfstring")) {
+ return processCFStringSection(file, section, is64, copyRefs);
}
break;
case llvm::MachO::S_COALESCED:
Added: lld/trunk/test/mach-o/parse-cfstring32.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/mach-o/parse-cfstring32.yaml?rev=209844&view=auto
==============================================================================
--- lld/trunk/test/mach-o/parse-cfstring32.yaml (added)
+++ lld/trunk/test/mach-o/parse-cfstring32.yaml Thu May 29 15:44:21 2014
@@ -0,0 +1,78 @@
+# RUN: lld -flavor darwin -arch i386 -r -print_atoms %s -o %t | FileCheck %s
+#
+# Test parsing of mach-o functions.
+#
+
+--- !mach-o
+arch: x86
+file-type: MH_OBJECT
+flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]
+has-UUID: false
+OS: unknown
+sections:
+ - segment: __TEXT
+ section: __cstring
+ type: S_CSTRING_LITERALS
+ attributes: [ ]
+ address: 0x0000000000000000
+ content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68,
+ 0x65, 0x72, 0x65, 0x00 ]
+ - segment: __DATA
+ section: __cfstring
+ type: S_REGULAR
+ attributes: [ ]
+ alignment: 3
+ address: 0x0000000000000010
+ content: [ 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xC8, 0x07, 0x00, 0x00,
+ 0x06, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00 ]
+ relocations:
+ - offset: 0x00000018
+ type: GENERIC_RELOC_VANILLA
+ length: 2
+ pc-rel: false
+ extern: false
+ symbol: 1
+ - offset: 0x00000010
+ type: GENERIC_RELOC_VANILLA
+ length: 2
+ pc-rel: false
+ extern: true
+ symbol: 1
+ - offset: 0x00000008
+ type: GENERIC_RELOC_VANILLA
+ length: 2
+ pc-rel: false
+ extern: false
+ symbol: 1
+ - offset: 0x00000000
+ type: GENERIC_RELOC_VANILLA
+ length: 2
+ pc-rel: false
+ extern: true
+ symbol: 1
+undefined-symbols:
+ - name: ___CFConstantStringClassReference
+ type: N_UNDF
+ scope: [ N_EXT ]
+ value: 0x0000000000000000
+...
+
+# CHECK: defined-atoms:
+# CHECK: - scope: hidden
+# CHECK: type: c-string
+# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ]
+# CHECK: merge: by-content
+# CHECK: - scope: hidden
+# CHECK: type: c-string
+# CHECK: content: [ 74, 68, 65, 72, 65, 00 ]
+# CHECK: merge: by-content
+# CHECK: - scope: hidden
+# CHECK: type: cfstring
+# CHECK: merge: by-content
+# CHECK: - scope: hidden
+# CHECK: type: cfstring
+# CHECK: merge: by-content
+# CHECK:undefined-atoms:
+# CHECK: - name: ___CFConstantStringClassReference
Added: lld/trunk/test/mach-o/parse-cfstring64.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/mach-o/parse-cfstring64.yaml?rev=209844&view=auto
==============================================================================
--- lld/trunk/test/mach-o/parse-cfstring64.yaml (added)
+++ lld/trunk/test/mach-o/parse-cfstring64.yaml Thu May 29 15:44:21 2014
@@ -0,0 +1,91 @@
+# RUN: lld -flavor darwin -arch x86_64 -r -print_atoms %s -o %t | FileCheck %s
+#
+# Test parsing of CFString constants.
+#
+
+--- !mach-o
+arch: x86_64
+file-type: MH_OBJECT
+flags: [ MH_SUBSECTIONS_VIA_SYMBOLS ]
+has-UUID: false
+OS: unknown
+sections:
+ - segment: __TEXT
+ section: __cstring
+ type: S_CSTRING_LITERALS
+ attributes: [ ]
+ address: 0x0000000000000000
+ content: [ 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x00, 0x74, 0x68,
+ 0x65, 0x72, 0x65, 0x00 ]
+ - segment: __DATA
+ section: __cfstring
+ type: S_REGULAR
+ attributes: [ ]
+ alignment: 4
+ address: 0x0000000000000010
+ content: [ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xC8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ]
+ relocations:
+ - offset: 0x00000030
+ type: X86_64_RELOC_UNSIGNED
+ length: 3
+ pc-rel: false
+ extern: true
+ symbol: 1
+ - offset: 0x00000020
+ type: X86_64_RELOC_UNSIGNED
+ length: 3
+ pc-rel: false
+ extern: true
+ symbol: 2
+ - offset: 0x00000010
+ type: X86_64_RELOC_UNSIGNED
+ length: 3
+ pc-rel: false
+ extern: true
+ symbol: 0
+ - offset: 0x00000000
+ type: X86_64_RELOC_UNSIGNED
+ length: 3
+ pc-rel: false
+ extern: true
+ symbol: 2
+local-symbols:
+ - name: Lstr1
+ type: N_SECT
+ sect: 1
+ value: 0x0000000000000000
+ - name: Lstr2
+ type: N_SECT
+ sect: 1
+ value: 0x0000000000000006
+undefined-symbols:
+ - name: ___CFConstantStringClassReference
+ type: N_UNDF
+ scope: [ N_EXT ]
+ value: 0x0000000000000000
+...
+
+# CHECK: defined-atoms:
+# CHECK: - scope: hidden
+# CHECK: type: c-string
+# CHECK: content: [ 68, 65, 6C, 6C, 6F, 00 ]
+# CHECK: merge: by-content
+# CHECK: - scope: hidden
+# CHECK: type: c-string
+# CHECK: content: [ 74, 68, 65, 72, 65, 00 ]
+# CHECK: merge: by-content
+# CHECK: - scope: hidden
+# CHECK: type: cfstring
+# CHECK: merge: by-content
+# CHECK: - scope: hidden
+# CHECK: type: cfstring
+# CHECK: merge: by-content
+# CHECK:undefined-atoms:
+# CHECK: - name: ___CFConstantStringClassReference
More information about the llvm-commits
mailing list