[llvm] [dsymutil] Avoid copying binary swiftmodules built from textual (PR #134719)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 7 12:58:01 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-debuginfo

Author: Adrian Prantl (adrian-prantl)

<details>
<summary>Changes</summary>

.swiftinterface files into the dSYM bundle. These typically come only from the SDK (since textual interfaces require library evolution) and thus are a waste of space to copy into the bundle.

The information about this is being parsed out of the control block, which means duplicating 5 constants from the Swift frontend. If a file cannot be parsed, dsymutil errs on the side of copying the file anyway.

rdar://138186524

---
Full diff: https://github.com/llvm/llvm-project/pull/134719.diff


10 Files Affected:

- (added) llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule () 
- (added) llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule () 
- (added) llvm/test/tools/dsymutil/swiftmodule.test (+29) 
- (modified) llvm/test/tools/dsymutil/yaml-object-address-rewrite.test (+3) 
- (modified) llvm/tools/dsymutil/CMakeLists.txt (+1) 
- (modified) llvm/tools/dsymutil/DebugMap.cpp (+7-5) 
- (modified) llvm/tools/dsymutil/DwarfLinkerForBinary.cpp (+16) 
- (modified) llvm/tools/dsymutil/RelocationMap.h (+1) 
- (added) llvm/tools/dsymutil/SwiftModule.cpp (+203) 
- (added) llvm/tools/dsymutil/SwiftModule.h (+15) 


``````````diff
diff --git a/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule b/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule
new file mode 100644
index 0000000000000..7ba817b22b707
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule differ
diff --git a/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule b/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule
new file mode 100644
index 0000000000000..2873ee93e137a
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule differ
diff --git a/llvm/test/tools/dsymutil/swiftmodule.test b/llvm/test/tools/dsymutil/swiftmodule.test
new file mode 100644
index 0000000000000..05d4021886c2f
--- /dev/null
+++ b/llvm/test/tools/dsymutil/swiftmodule.test
@@ -0,0 +1,29 @@
+# RUN: dsymutil -verbose -oso-prepend-path=%p -y -o %t.dSYM  %s | FileCheck %s
+#
+# RUN: dsymutil --linker parallel -verbose -oso-prepend-path=%p -y %s -o %t-parallel.dSYM | FileCheck %s
+#
+# To regenerate:
+# echo ''>I.swift
+# echo ''>B.swift
+# echo 'import I'>main.swift
+# xcrun swiftc -emit-module-interface-path I.swiftinterface -enable-library-evolution I.swift
+# xcrun swiftc -emit-module-path B.swiftmodule B.swift -Xfrontend -no-serialize-debugging-options
+# xcrun swiftc -explicit-module-build main.swift -I. -module-cache-path cache -g -Xfrontend  -no-serialize-debugging-options
+# output is "B.swiftmodule" and "cache/I*.swiftmodule"
+#
+# CHECK-NOT: Skipping compiled textual Swift interface: {{.*}}/Inputs/Binary.swiftmodule
+# CHECK: Skipping compiled textual Swift interface: {{.*}}/Inputs/FromInterface.swiftmodule
+
+#
+---
+triple:          'arm64-apple-darwin'
+objects:
+  - filename:        '/Inputs/Binary.swiftmodule'
+    timestamp:       0
+    type:            50
+    symbols:         []
+  - filename:        '/Inputs/FromInterface.swiftmodule'
+    timestamp:       0
+    type:            50
+    symbols:         []
+...
diff --git a/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test b/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
index dfa0f285c5ce5..74e8c1e7ae777 100644
--- a/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
+++ b/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
@@ -12,10 +12,12 @@
 # CHECK-NEXT: objects:
 # CHECK-NEXT: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
 # CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
 # CHECK-NEXT: symbols:
 # CHECK-NEXT: sym: _main, objAddr: 0x0, binAddr: 0x100000EA0, size: 0x24
 # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic2.macho.x86_64.o)'
 # CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
 # CHECK-NEXT: symbols:
 # CHECK-DAG:   sym: _foo, objAddr: 0x20, binAddr: 0x100000ED0, size: 0x50
 # CHECK-DAG:   sym: _private_int, objAddr: 0x560, binAddr: 0x100001004, size: 0x0
@@ -24,6 +26,7 @@
 # CHECK-NOT: { sym:
 # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic3.macho.x86_64.o)'
 # CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
 # CHECK-NEXT: symbols:
 # CHECK-DAG:   sym: _val, binAddr: 0x100001008, size: 0x0
 # CHECK-DAG:   sym: _bar, objAddr: 0x20, binAddr: 0x100000F40, size: 0x50
diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt
index efe28bda68ebf..f88b1d0b20cef 100644
--- a/llvm/tools/dsymutil/CMakeLists.txt
+++ b/llvm/tools/dsymutil/CMakeLists.txt
@@ -32,6 +32,7 @@ add_llvm_tool(dsymutil
   MachOUtils.cpp
   Reproducer.cpp
   RelocationMap.cpp
+  SwiftModule.cpp
 
   DEPENDS
   intrinsics_gen
diff --git a/llvm/tools/dsymutil/DebugMap.cpp b/llvm/tools/dsymutil/DebugMap.cpp
index b38d502dda7c9..1ce7e9b9ed201 100644
--- a/llvm/tools/dsymutil/DebugMap.cpp
+++ b/llvm/tools/dsymutil/DebugMap.cpp
@@ -50,8 +50,8 @@ bool DebugMapObject::addSymbol(StringRef Name,
     return true;
   }
 
-  auto InsertResult = Symbols.insert(
-      std::make_pair(Name, SymbolMapping(ObjectAddress, LinkedAddress, Size)));
+  auto InsertResult = Symbols.insert(std::make_pair(
+      Name, SymbolMapping(ObjectAddress, LinkedAddress, Size)));
 
   if (ObjectAddress && InsertResult.second)
     AddressToMapping[*ObjectAddress] = &*InsertResult.first;
@@ -161,12 +161,13 @@ namespace yaml {
 
 // Normalize/Denormalize between YAML and a DebugMapObject.
 struct MappingTraits<dsymutil::DebugMapObject>::YamlDMO {
-  YamlDMO(IO &io) { Timestamp = 0; }
+  YamlDMO(IO &io) {}
   YamlDMO(IO &io, dsymutil::DebugMapObject &Obj);
   dsymutil::DebugMapObject denormalize(IO &IO);
 
   std::string Filename;
-  int64_t Timestamp;
+  int64_t Timestamp = 0;
+  uint8_t Type = MachO::N_OSO;
   std::vector<dsymutil::DebugMapObject::YAMLSymbolMapping> Entries;
 };
 
@@ -183,6 +184,7 @@ void MappingTraits<dsymutil::DebugMapObject>::mapping(
   MappingNormalization<YamlDMO, dsymutil::DebugMapObject> Norm(io, DMO);
   io.mapRequired("filename", Norm->Filename);
   io.mapOptional("timestamp", Norm->Timestamp);
+  io.mapOptional("type", Norm->Type);
   io.mapRequired("symbols", Norm->Entries);
 }
 
@@ -236,6 +238,7 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::YamlDMO(
     IO &io, dsymutil::DebugMapObject &Obj) {
   Filename = Obj.Filename;
   Timestamp = sys::toTimeT(Obj.getTimestamp());
+  Type = Obj.getType();
   Entries.reserve(Obj.Symbols.size());
   for (auto &Entry : Obj.Symbols)
     Entries.push_back(
@@ -286,7 +289,6 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::denormalize(IO &IO) {
     }
   }
 
-  uint8_t Type = MachO::N_OSO;
   if (Path.ends_with(".dylib")) {
     // FIXME: find a more resilient way
     Type = MachO::N_LIB;
diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
index f6a35708dc076..c8fa4dbeffb9e 100644
--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -10,6 +10,7 @@
 #include "BinaryHolder.h"
 #include "DebugMap.h"
 #include "MachOUtils.h"
+#include "SwiftModule.h"
 #include "dsymutil.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -783,6 +784,21 @@ bool DwarfLinkerForBinary::linkImpl(
         reportWarning("Could not open '" + File + "'");
         continue;
       }
+      auto FromInterfaceOrErr =
+          IsBuiltFromSwiftInterface((*ErrorOrMem)->getBuffer());
+      if (!FromInterfaceOrErr) {
+        reportWarning("Could not parse binary Swift module: " +
+                          toString(FromInterfaceOrErr.takeError()),
+                      Obj->getObjectFilename());
+        // Only skip swiftmodules that could be parsed and are
+        // positively identified as textual.
+      } else if (*FromInterfaceOrErr) {
+        if (Options.Verbose)
+          outs() << "Skipping compiled textual Swift interface: "
+                 << Obj->getObjectFilename() << "\n";
+        continue;
+      }
+
       sys::fs::file_status Stat;
       if (auto Err = sys::fs::status(File, Stat)) {
         reportWarning(Err.message());
diff --git a/llvm/tools/dsymutil/RelocationMap.h b/llvm/tools/dsymutil/RelocationMap.h
index 3d851acf2b892..5a804cd141c38 100644
--- a/llvm/tools/dsymutil/RelocationMap.h
+++ b/llvm/tools/dsymutil/RelocationMap.h
@@ -37,6 +37,7 @@ struct SymbolMapping {
   std::optional<yaml::Hex64> ObjectAddress;
   yaml::Hex64 BinaryAddress;
   yaml::Hex32 Size;
+  yaml::Hex8 Type;
 
   SymbolMapping(std::optional<uint64_t> ObjectAddr, uint64_t BinaryAddress,
                 uint32_t Size)
diff --git a/llvm/tools/dsymutil/SwiftModule.cpp b/llvm/tools/dsymutil/SwiftModule.cpp
new file mode 100644
index 0000000000000..ba7ee8f1c93ed
--- /dev/null
+++ b/llvm/tools/dsymutil/SwiftModule.cpp
@@ -0,0 +1,203 @@
+//===- tools/dsymutil/SwiftModule.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+
+static const unsigned char SWIFTMODULE_SIGNATURE[] = {0xE2, 0x9C, 0xA8, 0x0E};
+static const uint16_t expectedMajorVersion = 0;
+static const unsigned MODULE_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID;
+static const unsigned CONTROL_BLOCK_ID =
+    llvm::bitc::FIRST_APPLICATION_BLOCKID + 1;
+static const unsigned METADATA = 1;
+static const unsigned OPTIONS_BLOCK_ID =
+    llvm::bitc::FIRST_APPLICATION_BLOCKID + 8;
+static const unsigned IS_BUILT_FROM_INTERFACE = 11;
+
+static llvm::Error
+checkModuleSignature(llvm::BitstreamCursor &cursor,
+                     llvm::ArrayRef<unsigned char> signature) {
+  for (unsigned char byte : signature) {
+    if (cursor.AtEndOfStream())
+      return llvm::createStringError("malformed bitstream");
+    if (llvm::Expected<llvm::SimpleBitstreamCursor::word_t> maybeRead =
+            cursor.Read(8)) {
+      if (maybeRead.get() != byte)
+        return llvm::createStringError("malformed bitstream");
+    } else
+      return maybeRead.takeError();
+  }
+  return llvm::Error::success();
+}
+
+static llvm::Error enterTopLevelModuleBlock(llvm::BitstreamCursor &cursor,
+                                            unsigned ID,
+                                            bool shouldReadBlockInfo = true) {
+  llvm::Expected<llvm::BitstreamEntry> maybeNext = cursor.advance();
+  if (!maybeNext)
+    return maybeNext.takeError();
+  llvm::BitstreamEntry next = maybeNext.get();
+
+  if (next.Kind != llvm::BitstreamEntry::SubBlock)
+    return llvm::createStringError("malformed bitstream");
+
+  if (next.ID == llvm::bitc::BLOCKINFO_BLOCK_ID) {
+    if (shouldReadBlockInfo) {
+      if (!cursor.ReadBlockInfoBlock())
+        return llvm::createStringError("malformed bitstream");
+    } else {
+      if (cursor.SkipBlock())
+        return llvm::createStringError("malformed bitstream");
+    }
+    return enterTopLevelModuleBlock(cursor, ID, false);
+  }
+
+  if (next.ID != ID)
+    return llvm::createStringError("malformed bitstream");
+
+  if (llvm::Error Err = cursor.EnterSubBlock(ID))
+    return Err;
+
+  return llvm::Error::success();
+}
+
+static llvm::Expected<bool>
+readOptionsBlock(llvm::BitstreamCursor &cursor,
+                 llvm::SmallVectorImpl<uint64_t> &scratch) {
+  bool is_built_from_interface = false;
+  while (!cursor.AtEndOfStream()) {
+    llvm::Expected<llvm::BitstreamEntry> maybeEntry = cursor.advance();
+    if (!maybeEntry)
+      return maybeEntry.takeError();
+
+    llvm::BitstreamEntry entry = maybeEntry.get();
+    if (entry.Kind == llvm::BitstreamEntry::EndBlock)
+      break;
+
+    if (entry.Kind == llvm::BitstreamEntry::Error)
+      return llvm::createStringError("malformed bitstream");
+
+    if (entry.Kind == llvm::BitstreamEntry::SubBlock) {
+      if (cursor.SkipBlock())
+        return llvm::createStringError("malformed bitstream");
+      continue;
+    }
+
+    scratch.clear();
+    llvm::StringRef blobData;
+    llvm::Expected<unsigned> maybeKind =
+        cursor.readRecord(entry.ID, scratch, &blobData);
+    if (!maybeKind)
+      return maybeKind.takeError();
+    unsigned kind = maybeKind.get();
+    switch (kind) {
+    case IS_BUILT_FROM_INTERFACE:
+      is_built_from_interface = true;
+      continue;
+    default:
+      continue;
+    }
+  }
+  return is_built_from_interface;
+}
+
+static llvm::Expected<bool>
+parseControlBlock(llvm::BitstreamCursor &cursor,
+                  llvm::SmallVectorImpl<uint64_t> &scratch) {
+  // The control block is malformed until we've at least read a major version
+  // number.
+  bool versionSeen = false;
+
+  while (!cursor.AtEndOfStream()) {
+    llvm::Expected<llvm::BitstreamEntry> maybeEntry = cursor.advance();
+    if (!maybeEntry) {
+      return maybeEntry.takeError();
+    }
+    llvm::BitstreamEntry entry = maybeEntry.get();
+    if (entry.Kind == llvm::BitstreamEntry::EndBlock)
+      break;
+
+    if (entry.Kind == llvm::BitstreamEntry::Error)
+      return llvm::createStringError("malformed bitstream");
+
+    if (entry.Kind == llvm::BitstreamEntry::SubBlock) {
+      if (entry.ID == OPTIONS_BLOCK_ID) {
+        if (llvm::Error Err = cursor.EnterSubBlock(OPTIONS_BLOCK_ID))
+          return Err;
+
+        return readOptionsBlock(cursor, scratch);
+      } else {
+        // Unknown metadata sub-block, possibly for use by a future version of
+        // the module format.
+        if (cursor.SkipBlock())
+          return llvm::createStringError("malformed bitstream");
+      }
+      continue;
+    }
+
+    scratch.clear();
+    llvm::StringRef blobData;
+    llvm::Expected<unsigned> maybeKind =
+        cursor.readRecord(entry.ID, scratch, &blobData);
+    if (!maybeKind)
+      return maybeKind.takeError();
+
+    unsigned kind = maybeKind.get();
+    switch (kind) {
+    case METADATA: {
+      if (versionSeen) {
+        return llvm::createStringError("multiple metadata blocks");
+      }
+
+      uint16_t versionMajor = scratch[0];
+      if (versionMajor != expectedMajorVersion)
+        return llvm::createStringError("unsupported module version");
+
+      versionSeen = true;
+      break;
+    }
+    default:
+      continue;
+    }
+  }
+  return llvm::createStringError("could not find control block");
+}
+
+llvm::Expected<bool> IsBuiltFromSwiftInterface(llvm::StringRef data) {
+  llvm::BitstreamCursor cursor(data);
+  if (llvm::Error Err = checkModuleSignature(cursor, SWIFTMODULE_SIGNATURE))
+    return llvm::joinErrors(
+        llvm::createStringError("could not check signature"), std::move(Err));
+  if (llvm::Error Err = enterTopLevelModuleBlock(cursor, MODULE_BLOCK_ID, false))
+    return llvm::joinErrors(
+        llvm::createStringError("could not enter top level block"),
+        std::move(Err));
+
+  llvm::BitstreamEntry topLevelEntry;
+  llvm::SmallVector<uint64_t, 32> scratch;
+
+  while (!cursor.AtEndOfStream()) {
+    llvm::Expected<llvm::BitstreamEntry> maybeEntry =
+        cursor.advance(llvm::BitstreamCursor::AF_DontPopBlockAtEnd);
+    if (!maybeEntry)
+      return maybeEntry.takeError();
+
+    topLevelEntry = maybeEntry.get();
+    if (topLevelEntry.Kind != llvm::BitstreamEntry::SubBlock)
+      break;
+
+    if (topLevelEntry.ID == CONTROL_BLOCK_ID) {
+      if (llvm::Error Err = cursor.EnterSubBlock(CONTROL_BLOCK_ID))
+        return Err;
+      return parseControlBlock(cursor, scratch);
+    }
+  }
+  return llvm::createStringError("no control block found");
+}
diff --git a/llvm/tools/dsymutil/SwiftModule.h b/llvm/tools/dsymutil/SwiftModule.h
new file mode 100644
index 0000000000000..599fe4530772f
--- /dev/null
+++ b/llvm/tools/dsymutil/SwiftModule.h
@@ -0,0 +1,15 @@
+//===- tools/dsymutil/SwiftModule.h ----------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TOOLS_DSYMUTIL_SWIFTMODULE_H
+#define LLVM_TOOLS_DSYMUTIL_SWIFTMODULE_H
+
+#include "llvm/Support/Error.h"
+
+llvm::Expected<bool> IsBuiltFromSwiftInterface(llvm::StringRef data);
+
+#endif

``````````

</details>


https://github.com/llvm/llvm-project/pull/134719


More information about the llvm-commits mailing list