[llvm] 96f95c9 - [dsymutil] Avoid copying binary swiftmodules built from textual

Adrian Prantl via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 8 12:37:49 PDT 2025


Author: Adrian Prantl
Date: 2025-04-08T12:37:27-07:00
New Revision: 96f95c9d89d8a1784d3865fa941fb1c510f4e2d7

URL: https://github.com/llvm/llvm-project/commit/96f95c9d89d8a1784d3865fa941fb1c510f4e2d7
DIFF: https://github.com/llvm/llvm-project/commit/96f95c9d89d8a1784d3865fa941fb1c510f4e2d7.diff

LOG: [dsymutil] Avoid copying binary swiftmodules built from textual

.swiftinterface files into the dSYM bundle. These typically come only
from the SDK (since textual interfaces require library evolution) and
thus are a waste of space to copy into the bundle.

The information about this is being parsed out of the control block,
which means duplicating 5 constants from the Swift frontend. If a file
cannot be parsed, dsymutil errs on the side of copying the file
anyway.

rdar://138186524

Relanding with additional linker dependency and moving the test into
the right target subdirectory.

Added: 
    llvm/test/tools/dsymutil/ARM/swiftmodule.test
    llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule
    llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule
    llvm/tools/dsymutil/SwiftModule.cpp
    llvm/tools/dsymutil/SwiftModule.h

Modified: 
    llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
    llvm/tools/dsymutil/CMakeLists.txt
    llvm/tools/dsymutil/DebugMap.cpp
    llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
    llvm/tools/dsymutil/RelocationMap.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/dsymutil/ARM/swiftmodule.test b/llvm/test/tools/dsymutil/ARM/swiftmodule.test
new file mode 100644
index 0000000000000..347f2841655ef
--- /dev/null
+++ b/llvm/test/tools/dsymutil/ARM/swiftmodule.test
@@ -0,0 +1,29 @@
+# RUN: dsymutil -verbose -oso-prepend-path=%p -y -o %t.dSYM  %s | FileCheck %s
+#
+# RUN: dsymutil --linker parallel -verbose -oso-prepend-path=%p -y %s -o %t-parallel.dSYM | FileCheck %s
+#
+# To regenerate:
+# echo ''>I.swift
+# echo ''>B.swift
+# echo 'import I'>main.swift
+# xcrun swiftc -emit-module-interface-path I.swiftinterface -enable-library-evolution I.swift
+# xcrun swiftc -emit-module-path B.swiftmodule B.swift -Xfrontend -no-serialize-debugging-options
+# xcrun swiftc -explicit-module-build main.swift -I. -module-cache-path cache -g -Xfrontend  -no-serialize-debugging-options
+# output is "B.swiftmodule" and "cache/I*.swiftmodule"
+#
+# CHECK-NOT: Skipping compiled textual Swift interface: {{.*}}/Inputs/Binary.swiftmodule
+# CHECK: Skipping compiled textual Swift interface: {{.*}}/Inputs/FromInterface.swiftmodule
+
+#
+---
+triple:          'arm64-apple-darwin'
+objects:
+  - filename:        '../Inputs/Binary.swiftmodule'
+    timestamp:       0
+    type:            50
+    symbols:         []
+  - filename:        '../Inputs/FromInterface.swiftmodule'
+    timestamp:       0
+    type:            50
+    symbols:         []
+...

diff  --git a/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule b/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule
new file mode 100644
index 0000000000000..7ba817b22b707
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule 
diff er

diff  --git a/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule b/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule
new file mode 100644
index 0000000000000..2873ee93e137a
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule 
diff er

diff  --git a/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test b/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
index dfa0f285c5ce5..74e8c1e7ae777 100644
--- a/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
+++ b/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
@@ -12,10 +12,12 @@
 # CHECK-NEXT: objects:
 # CHECK-NEXT: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
 # CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
 # CHECK-NEXT: symbols:
 # CHECK-NEXT: sym: _main, objAddr: 0x0, binAddr: 0x100000EA0, size: 0x24
 # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic2.macho.x86_64.o)'
 # CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
 # CHECK-NEXT: symbols:
 # CHECK-DAG:   sym: _foo, objAddr: 0x20, binAddr: 0x100000ED0, size: 0x50
 # CHECK-DAG:   sym: _private_int, objAddr: 0x560, binAddr: 0x100001004, size: 0x0
@@ -24,6 +26,7 @@
 # CHECK-NOT: { sym:
 # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic3.macho.x86_64.o)'
 # CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
 # CHECK-NEXT: symbols:
 # CHECK-DAG:   sym: _val, binAddr: 0x100001008, size: 0x0
 # CHECK-DAG:   sym: _bar, objAddr: 0x20, binAddr: 0x100000F40, size: 0x50

diff  --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt
index efe28bda68ebf..dbe42935b1b10 100644
--- a/llvm/tools/dsymutil/CMakeLists.txt
+++ b/llvm/tools/dsymutil/CMakeLists.txt
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
   AllTargetsDescs
   AllTargetsInfos
   AsmPrinter
+  BitReader
   CodeGen
   CodeGenTypes
   DWARFLinker
@@ -32,6 +33,7 @@ add_llvm_tool(dsymutil
   MachOUtils.cpp
   Reproducer.cpp
   RelocationMap.cpp
+  SwiftModule.cpp
 
   DEPENDS
   intrinsics_gen

diff  --git a/llvm/tools/dsymutil/DebugMap.cpp b/llvm/tools/dsymutil/DebugMap.cpp
index b38d502dda7c9..f1cd7e402f28d 100644
--- a/llvm/tools/dsymutil/DebugMap.cpp
+++ b/llvm/tools/dsymutil/DebugMap.cpp
@@ -161,12 +161,13 @@ namespace yaml {
 
 // Normalize/Denormalize between YAML and a DebugMapObject.
 struct MappingTraits<dsymutil::DebugMapObject>::YamlDMO {
-  YamlDMO(IO &io) { Timestamp = 0; }
+  YamlDMO(IO &io) {}
   YamlDMO(IO &io, dsymutil::DebugMapObject &Obj);
   dsymutil::DebugMapObject denormalize(IO &IO);
 
   std::string Filename;
-  int64_t Timestamp;
+  int64_t Timestamp = 0;
+  uint8_t Type = MachO::N_OSO;
   std::vector<dsymutil::DebugMapObject::YAMLSymbolMapping> Entries;
 };
 
@@ -183,6 +184,7 @@ void MappingTraits<dsymutil::DebugMapObject>::mapping(
   MappingNormalization<YamlDMO, dsymutil::DebugMapObject> Norm(io, DMO);
   io.mapRequired("filename", Norm->Filename);
   io.mapOptional("timestamp", Norm->Timestamp);
+  io.mapOptional("type", Norm->Type);
   io.mapRequired("symbols", Norm->Entries);
 }
 
@@ -236,6 +238,7 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::YamlDMO(
     IO &io, dsymutil::DebugMapObject &Obj) {
   Filename = Obj.Filename;
   Timestamp = sys::toTimeT(Obj.getTimestamp());
+  Type = Obj.getType();
   Entries.reserve(Obj.Symbols.size());
   for (auto &Entry : Obj.Symbols)
     Entries.push_back(
@@ -286,7 +289,6 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::denormalize(IO &IO) {
     }
   }
 
-  uint8_t Type = MachO::N_OSO;
   if (Path.ends_with(".dylib")) {
     // FIXME: find a more resilient way
     Type = MachO::N_LIB;

diff  --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
index f6a35708dc076..c8fa4dbeffb9e 100644
--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -10,6 +10,7 @@
 #include "BinaryHolder.h"
 #include "DebugMap.h"
 #include "MachOUtils.h"
+#include "SwiftModule.h"
 #include "dsymutil.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -783,6 +784,21 @@ bool DwarfLinkerForBinary::linkImpl(
         reportWarning("Could not open '" + File + "'");
         continue;
       }
+      auto FromInterfaceOrErr =
+          IsBuiltFromSwiftInterface((*ErrorOrMem)->getBuffer());
+      if (!FromInterfaceOrErr) {
+        reportWarning("Could not parse binary Swift module: " +
+                          toString(FromInterfaceOrErr.takeError()),
+                      Obj->getObjectFilename());
+        // Only skip swiftmodules that could be parsed and are
+        // positively identified as textual.
+      } else if (*FromInterfaceOrErr) {
+        if (Options.Verbose)
+          outs() << "Skipping compiled textual Swift interface: "
+                 << Obj->getObjectFilename() << "\n";
+        continue;
+      }
+
       sys::fs::file_status Stat;
       if (auto Err = sys::fs::status(File, Stat)) {
         reportWarning(Err.message());

diff  --git a/llvm/tools/dsymutil/RelocationMap.h b/llvm/tools/dsymutil/RelocationMap.h
index 3d851acf2b892..5a804cd141c38 100644
--- a/llvm/tools/dsymutil/RelocationMap.h
+++ b/llvm/tools/dsymutil/RelocationMap.h
@@ -37,6 +37,7 @@ struct SymbolMapping {
   std::optional<yaml::Hex64> ObjectAddress;
   yaml::Hex64 BinaryAddress;
   yaml::Hex32 Size;
+  yaml::Hex8 Type;
 
   SymbolMapping(std::optional<uint64_t> ObjectAddr, uint64_t BinaryAddress,
                 uint32_t Size)

diff  --git a/llvm/tools/dsymutil/SwiftModule.cpp b/llvm/tools/dsymutil/SwiftModule.cpp
new file mode 100644
index 0000000000000..7b21f30237e4e
--- /dev/null
+++ b/llvm/tools/dsymutil/SwiftModule.cpp
@@ -0,0 +1,192 @@
+//===- tools/dsymutil/SwiftModule.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+
+namespace {
+// Copied from swift/lib/Serialization/ModuleFormat.h
+constexpr unsigned char SWIFTMODULE_SIGNATURE[] = {0xE2, 0x9C, 0xA8, 0x0E};
+constexpr uint16_t expectedMajorVersion = 0;
+constexpr unsigned MODULE_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID;
+constexpr unsigned CONTROL_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID + 1;
+constexpr unsigned METADATA = 1;
+constexpr unsigned OPTIONS_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID + 8;
+constexpr unsigned IS_BUILT_FROM_INTERFACE = 11;
+
+llvm::Error checkModuleSignature(llvm::BitstreamCursor &cursor,
+                                 llvm::ArrayRef<unsigned char> signature) {
+  for (unsigned char byte : signature) {
+    if (cursor.AtEndOfStream())
+      return llvm::createStringError("malformed bitstream");
+    llvm::Expected<llvm::SimpleBitstreamCursor::word_t> maybeRead =
+        cursor.Read(8);
+    if (!maybeRead)
+      return maybeRead.takeError();
+    if (maybeRead.get() != byte)
+      return llvm::createStringError("malformed bitstream");
+  }
+  return llvm::Error::success();
+}
+
+llvm::Error enterTopLevelModuleBlock(llvm::BitstreamCursor &cursor,
+                                     unsigned ID) {
+  llvm::Expected<llvm::BitstreamEntry> maybeNext = cursor.advance();
+  if (!maybeNext)
+    return maybeNext.takeError();
+  llvm::BitstreamEntry next = maybeNext.get();
+
+  if (next.Kind != llvm::BitstreamEntry::SubBlock)
+    return llvm::createStringError("malformed bitstream");
+
+  if (next.ID == llvm::bitc::BLOCKINFO_BLOCK_ID) {
+    if (cursor.SkipBlock())
+      return llvm::createStringError("malformed bitstream");
+    return enterTopLevelModuleBlock(cursor, ID);
+  }
+
+  if (next.ID != ID)
+    return llvm::createStringError("malformed bitstream");
+
+  if (llvm::Error Err = cursor.EnterSubBlock(ID))
+    return Err;
+
+  return llvm::Error::success();
+}
+
+llvm::Expected<bool>
+readOptionsBlock(llvm::BitstreamCursor &cursor,
+                 llvm::SmallVectorImpl<uint64_t> &scratch) {
+  bool is_built_from_interface = false;
+  while (!cursor.AtEndOfStream()) {
+    llvm::Expected<llvm::BitstreamEntry> maybeEntry = cursor.advance();
+    if (!maybeEntry)
+      return maybeEntry.takeError();
+
+    llvm::BitstreamEntry entry = maybeEntry.get();
+    if (entry.Kind == llvm::BitstreamEntry::EndBlock)
+      break;
+
+    if (entry.Kind == llvm::BitstreamEntry::Error)
+      return llvm::createStringError("malformed bitstream");
+
+    if (entry.Kind == llvm::BitstreamEntry::SubBlock) {
+      if (cursor.SkipBlock())
+        return llvm::createStringError("malformed bitstream");
+      continue;
+    }
+
+    scratch.clear();
+    llvm::StringRef blobData;
+    llvm::Expected<unsigned> maybeKind =
+        cursor.readRecord(entry.ID, scratch, &blobData);
+    if (!maybeKind)
+      return maybeKind.takeError();
+    unsigned kind = maybeKind.get();
+    switch (kind) {
+    case IS_BUILT_FROM_INTERFACE:
+      is_built_from_interface = true;
+      continue;
+    default:
+      continue;
+    }
+  }
+  return is_built_from_interface;
+}
+
+llvm::Expected<bool>
+parseControlBlock(llvm::BitstreamCursor &cursor,
+                  llvm::SmallVectorImpl<uint64_t> &scratch) {
+  // The control block is malformed until we've at least read a major version
+  // number.
+  bool versionSeen = false;
+
+  while (!cursor.AtEndOfStream()) {
+    llvm::Expected<llvm::BitstreamEntry> maybeEntry = cursor.advance();
+    if (!maybeEntry)
+      return maybeEntry.takeError();
+
+    llvm::BitstreamEntry entry = maybeEntry.get();
+    if (entry.Kind == llvm::BitstreamEntry::EndBlock)
+      break;
+
+    if (entry.Kind == llvm::BitstreamEntry::Error)
+      return llvm::createStringError("malformed bitstream");
+
+    if (entry.Kind == llvm::BitstreamEntry::SubBlock) {
+      if (entry.ID == OPTIONS_BLOCK_ID) {
+        if (llvm::Error Err = cursor.EnterSubBlock(OPTIONS_BLOCK_ID))
+          return Err;
+
+        return readOptionsBlock(cursor, scratch);
+      } else {
+        // Unknown metadata sub-block, possibly for use by a future version of
+        // the module format.
+        if (cursor.SkipBlock())
+          return llvm::createStringError("malformed bitstream");
+      }
+      continue;
+    }
+
+    scratch.clear();
+    llvm::StringRef blobData;
+    llvm::Expected<unsigned> maybeKind =
+        cursor.readRecord(entry.ID, scratch, &blobData);
+    if (!maybeKind)
+      return maybeKind.takeError();
+
+    unsigned kind = maybeKind.get();
+    if (kind == METADATA) {
+      if (versionSeen)
+        return llvm::createStringError("multiple metadata blocks");
+
+      uint16_t versionMajor = scratch[0];
+      if (versionMajor != expectedMajorVersion)
+        return llvm::createStringError("unsupported module version");
+
+      versionSeen = true;
+    }
+  }
+  return llvm::createStringError("could not find control block");
+}
+
+} // namespace
+
+llvm::Expected<bool> IsBuiltFromSwiftInterface(llvm::StringRef data) {
+  llvm::BitstreamCursor cursor(data);
+  if (llvm::Error Err = checkModuleSignature(cursor, SWIFTMODULE_SIGNATURE))
+    return llvm::joinErrors(
+        llvm::createStringError("could not check signature"), std::move(Err));
+  if (llvm::Error Err = enterTopLevelModuleBlock(cursor, MODULE_BLOCK_ID))
+    return llvm::joinErrors(
+        llvm::createStringError("could not enter top level block"),
+        std::move(Err));
+
+  llvm::BitstreamEntry topLevelEntry;
+  llvm::SmallVector<uint64_t, 32> scratch;
+
+  while (!cursor.AtEndOfStream()) {
+    llvm::Expected<llvm::BitstreamEntry> maybeEntry =
+        cursor.advance(llvm::BitstreamCursor::AF_DontPopBlockAtEnd);
+    if (!maybeEntry)
+      return maybeEntry.takeError();
+
+    topLevelEntry = maybeEntry.get();
+    if (topLevelEntry.Kind != llvm::BitstreamEntry::SubBlock)
+      break;
+
+    if (topLevelEntry.ID == CONTROL_BLOCK_ID) {
+      if (llvm::Error Err = cursor.EnterSubBlock(CONTROL_BLOCK_ID))
+        return Err;
+      return parseControlBlock(cursor, scratch);
+    }
+  }
+  return llvm::createStringError("no control block found");
+}

diff  --git a/llvm/tools/dsymutil/SwiftModule.h b/llvm/tools/dsymutil/SwiftModule.h
new file mode 100644
index 0000000000000..9a272fd6fed36
--- /dev/null
+++ b/llvm/tools/dsymutil/SwiftModule.h
@@ -0,0 +1,15 @@
+//===- tools/dsymutil/SwiftModule.h ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TOOLS_DSYMUTIL_SWIFTMODULE_H
+#define LLVM_TOOLS_DSYMUTIL_SWIFTMODULE_H
+
+#include "llvm/Support/Error.h"
+
+llvm::Expected<bool> IsBuiltFromSwiftInterface(llvm::StringRef data);
+
+#endif


        


More information about the llvm-commits mailing list