[lld] 3925ea4 - [lld-macho][nfci] Don't include null terminator in StringRefs

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 13 18:24:56 PDT 2022


Author: Jez Ng
Date: 2022-09-13T21:23:48-04:00
New Revision: 3925ea4172fa7f9024e5bae8815c01256eea887b

URL: https://github.com/llvm/llvm-project/commit/3925ea4172fa7f9024e5bae8815c01256eea887b
DIFF: https://github.com/llvm/llvm-project/commit/3925ea4172fa7f9024e5bae8815c01256eea887b.diff

LOG: [lld-macho][nfci] Don't include null terminator in StringRefs

So @keith observed
[here](https://reviews.llvm.org/D128108#inline-1263900) that the
StringRefs we were returning from `CStringInputSection::getStringRef()`
included the null terminator in their total length, but regular
StringRefs do not. Let's fix that so these StringRefs are less confusing
to use.

Reviewed By: #lld-macho, keith, Roger

Differential Revision: https://reviews.llvm.org/D133728

Added: 
    

Modified: 
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h
    lld/MachO/MapFile.cpp
    lld/MachO/SyntheticSections.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 5c930729c9000..fb1b9291e30a5 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -242,9 +242,9 @@ void CStringInputSection::splitIntoPieces() {
     size_t end = s.find(0);
     if (end == StringRef::npos)
       fatal(getLocation(off) + ": string is not null terminated");
-    size_t size = end + 1;
-    uint32_t hash = deduplicateLiterals ? xxHash64(s.substr(0, size)) : 0;
+    uint32_t hash = deduplicateLiterals ? xxHash64(s.take_front(end)) : 0;
     pieces.emplace_back(off, hash);
+    size_t size = end + 1; // include null terminator
     s = s.substr(size);
     off += size;
   }

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 8946724e2d984..58e75b156b558 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -208,8 +208,10 @@ class CStringInputSection final : public InputSection {
   LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringRef getStringRef(size_t i) const {
     size_t begin = pieces[i].inSecOff;
+    // The endpoint should be *at* the null terminator, not after. This matches
+    // the behavior of StringRef(const char *Str).
     size_t end =
-        (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff;
+        ((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1;
     return toStringRef(data.slice(begin, end - begin));
   }
 

diff  --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 19eb25fe1eeb0..84c536e0464ef 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -88,10 +88,7 @@ getSymbolStrings(ArrayRef<Defined *> syms) {
           sym->value == piece.inSecOff &&
           "We expect symbols to always point to the start of a StringPiece.");
       StringRef str = isec->getStringRef(&piece - &(*isec->pieces.begin()));
-      assert(str.back() == '\000');
-      (os << "literal string: ")
-          // Remove null sequence at the end
-          .write_escaped(str.substr(0, str.size() - 1));
+      (os << "literal string: ").write_escaped(str);
       break;
     }
     case InputSection::ConcatKind:

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 6a639826c6175..78f379ea8625f 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -752,11 +752,7 @@ ObjCStubsSection::ObjCStubsSection()
 
 void ObjCStubsSection::addEntry(Symbol *sym) {
   assert(sym->getName().startswith(symbolPrefix) && "not an objc stub");
-  // Ensure our lookup string has the length of the actual string + the null
-  // terminator to mirror
-  StringRef methname =
-      StringRef(sym->getName().data() + symbolPrefix.size(),
-                sym->getName().size() - symbolPrefix.size() + 1);
+  StringRef methname = sym->getName().drop_front(symbolPrefix.size());
   offsets.push_back(
       in.objcMethnameSection->getStringOffset(methname).outSecOff);
   Defined *newSym = replaceSymbol<Defined>(
@@ -1572,7 +1568,7 @@ void CStringSection::finalizeContents() {
       isec->pieces[i].outSecOff = offset;
       isec->isFinal = true;
       StringRef string = isec->getStringRef(i);
-      offset += string.size();
+      offset += string.size() + 1; // account for null terminator
     }
   }
   size = offset;
@@ -1645,7 +1641,8 @@ void DeduplicatedCStringSection::finalizeContents() {
       StringOffset &offsetInfo = it->second;
       if (offsetInfo.outSecOff == UINT64_MAX) {
         offsetInfo.outSecOff = alignTo(size, 1ULL << offsetInfo.trailingZeros);
-        size = offsetInfo.outSecOff + s.size();
+        size =
+            offsetInfo.outSecOff + s.size() + 1; // account for null terminator
       }
       isec->pieces[i].outSecOff = offsetInfo.outSecOff;
     }


        


More information about the llvm-commits mailing list