[llvm] [dsymutil] Avoid copying binary swiftmodules built from textual (PR #134719)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 7 12:58:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-debuginfo
Author: Adrian Prantl (adrian-prantl)
<details>
<summary>Changes</summary>
.swiftinterface files into the dSYM bundle. These typically come only from the SDK (since textual interfaces require library evolution) and thus are a waste of space to copy into the bundle.
The information about this is being parsed out of the control block, which means duplicating 5 constants from the Swift frontend. If a file cannot be parsed, dsymutil errs on the side of copying the file anyway.
rdar://138186524
---
Full diff: https://github.com/llvm/llvm-project/pull/134719.diff
10 Files Affected:
- (added) llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule ()
- (added) llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule ()
- (added) llvm/test/tools/dsymutil/swiftmodule.test (+29)
- (modified) llvm/test/tools/dsymutil/yaml-object-address-rewrite.test (+3)
- (modified) llvm/tools/dsymutil/CMakeLists.txt (+1)
- (modified) llvm/tools/dsymutil/DebugMap.cpp (+7-5)
- (modified) llvm/tools/dsymutil/DwarfLinkerForBinary.cpp (+16)
- (modified) llvm/tools/dsymutil/RelocationMap.h (+1)
- (added) llvm/tools/dsymutil/SwiftModule.cpp (+203)
- (added) llvm/tools/dsymutil/SwiftModule.h (+15)
``````````diff
diff --git a/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule b/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule
new file mode 100644
index 0000000000000..7ba817b22b707
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/Binary.swiftmodule differ
diff --git a/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule b/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule
new file mode 100644
index 0000000000000..2873ee93e137a
Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/FromInterface.swiftmodule differ
diff --git a/llvm/test/tools/dsymutil/swiftmodule.test b/llvm/test/tools/dsymutil/swiftmodule.test
new file mode 100644
index 0000000000000..05d4021886c2f
--- /dev/null
+++ b/llvm/test/tools/dsymutil/swiftmodule.test
@@ -0,0 +1,29 @@
+# RUN: dsymutil -verbose -oso-prepend-path=%p -y -o %t.dSYM %s | FileCheck %s
+#
+# RUN: dsymutil --linker parallel -verbose -oso-prepend-path=%p -y %s -o %t-parallel.dSYM | FileCheck %s
+#
+# To regenerate:
+# echo ''>I.swift
+# echo ''>B.swift
+# echo 'import I'>main.swift
+# xcrun swiftc -emit-module-interface-path I.swiftinterface -enable-library-evolution I.swift
+# xcrun swiftc -emit-module-path B.swiftmodule B.swift -Xfrontend -no-serialize-debugging-options
+# xcrun swiftc -explicit-module-build main.swift -I. -module-cache-path cache -g -Xfrontend -no-serialize-debugging-options
+# output is "B.swiftmodule" and "cache/I*.swiftmodule"
+#
+# CHECK-NOT: Skipping compiled textual Swift interface: {{.*}}/Inputs/Binary.swiftmodule
+# CHECK: Skipping compiled textual Swift interface: {{.*}}/Inputs/FromInterface.swiftmodule
+
+#
+---
+triple: 'arm64-apple-darwin'
+objects:
+ - filename: '/Inputs/Binary.swiftmodule'
+ timestamp: 0
+ type: 50
+ symbols: []
+ - filename: '/Inputs/FromInterface.swiftmodule'
+ timestamp: 0
+ type: 50
+ symbols: []
+...
diff --git a/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test b/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
index dfa0f285c5ce5..74e8c1e7ae777 100644
--- a/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
+++ b/llvm/test/tools/dsymutil/yaml-object-address-rewrite.test
@@ -12,10 +12,12 @@
# CHECK-NEXT: objects:
# CHECK-NEXT: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
# CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
# CHECK-NEXT: symbols:
# CHECK-NEXT: sym: _main, objAddr: 0x0, binAddr: 0x100000EA0, size: 0x24
# CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic2.macho.x86_64.o)'
# CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
# CHECK-NEXT: symbols:
# CHECK-DAG: sym: _foo, objAddr: 0x20, binAddr: 0x100000ED0, size: 0x50
# CHECK-DAG: sym: _private_int, objAddr: 0x560, binAddr: 0x100001004, size: 0x0
@@ -24,6 +26,7 @@
# CHECK-NOT: { sym:
# CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic3.macho.x86_64.o)'
# CHECK-NEXT: timestamp: 0
+# CHECK-NEXT: type: 102
# CHECK-NEXT: symbols:
# CHECK-DAG: sym: _val, binAddr: 0x100001008, size: 0x0
# CHECK-DAG: sym: _bar, objAddr: 0x20, binAddr: 0x100000F40, size: 0x50
diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt
index efe28bda68ebf..f88b1d0b20cef 100644
--- a/llvm/tools/dsymutil/CMakeLists.txt
+++ b/llvm/tools/dsymutil/CMakeLists.txt
@@ -32,6 +32,7 @@ add_llvm_tool(dsymutil
MachOUtils.cpp
Reproducer.cpp
RelocationMap.cpp
+ SwiftModule.cpp
DEPENDS
intrinsics_gen
diff --git a/llvm/tools/dsymutil/DebugMap.cpp b/llvm/tools/dsymutil/DebugMap.cpp
index b38d502dda7c9..1ce7e9b9ed201 100644
--- a/llvm/tools/dsymutil/DebugMap.cpp
+++ b/llvm/tools/dsymutil/DebugMap.cpp
@@ -50,8 +50,8 @@ bool DebugMapObject::addSymbol(StringRef Name,
return true;
}
- auto InsertResult = Symbols.insert(
- std::make_pair(Name, SymbolMapping(ObjectAddress, LinkedAddress, Size)));
+ auto InsertResult = Symbols.insert(std::make_pair(
+ Name, SymbolMapping(ObjectAddress, LinkedAddress, Size)));
if (ObjectAddress && InsertResult.second)
AddressToMapping[*ObjectAddress] = &*InsertResult.first;
@@ -161,12 +161,13 @@ namespace yaml {
// Normalize/Denormalize between YAML and a DebugMapObject.
struct MappingTraits<dsymutil::DebugMapObject>::YamlDMO {
- YamlDMO(IO &io) { Timestamp = 0; }
+ YamlDMO(IO &io) {}
YamlDMO(IO &io, dsymutil::DebugMapObject &Obj);
dsymutil::DebugMapObject denormalize(IO &IO);
std::string Filename;
- int64_t Timestamp;
+ int64_t Timestamp = 0;
+ uint8_t Type = MachO::N_OSO;
std::vector<dsymutil::DebugMapObject::YAMLSymbolMapping> Entries;
};
@@ -183,6 +184,7 @@ void MappingTraits<dsymutil::DebugMapObject>::mapping(
MappingNormalization<YamlDMO, dsymutil::DebugMapObject> Norm(io, DMO);
io.mapRequired("filename", Norm->Filename);
io.mapOptional("timestamp", Norm->Timestamp);
+ io.mapOptional("type", Norm->Type);
io.mapRequired("symbols", Norm->Entries);
}
@@ -236,6 +238,7 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::YamlDMO(
IO &io, dsymutil::DebugMapObject &Obj) {
Filename = Obj.Filename;
Timestamp = sys::toTimeT(Obj.getTimestamp());
+ Type = Obj.getType();
Entries.reserve(Obj.Symbols.size());
for (auto &Entry : Obj.Symbols)
Entries.push_back(
@@ -286,7 +289,6 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::denormalize(IO &IO) {
}
}
- uint8_t Type = MachO::N_OSO;
if (Path.ends_with(".dylib")) {
// FIXME: find a more resilient way
Type = MachO::N_LIB;
diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
index f6a35708dc076..c8fa4dbeffb9e 100644
--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -10,6 +10,7 @@
#include "BinaryHolder.h"
#include "DebugMap.h"
#include "MachOUtils.h"
+#include "SwiftModule.h"
#include "dsymutil.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -783,6 +784,21 @@ bool DwarfLinkerForBinary::linkImpl(
reportWarning("Could not open '" + File + "'");
continue;
}
+ auto FromInterfaceOrErr =
+ IsBuiltFromSwiftInterface((*ErrorOrMem)->getBuffer());
+ if (!FromInterfaceOrErr) {
+ reportWarning("Could not parse binary Swift module: " +
+ toString(FromInterfaceOrErr.takeError()),
+ Obj->getObjectFilename());
+ // Only skip swiftmodules that could be parsed and are
+ // positively identified as textual.
+ } else if (*FromInterfaceOrErr) {
+ if (Options.Verbose)
+ outs() << "Skipping compiled textual Swift interface: "
+ << Obj->getObjectFilename() << "\n";
+ continue;
+ }
+
sys::fs::file_status Stat;
if (auto Err = sys::fs::status(File, Stat)) {
reportWarning(Err.message());
diff --git a/llvm/tools/dsymutil/RelocationMap.h b/llvm/tools/dsymutil/RelocationMap.h
index 3d851acf2b892..5a804cd141c38 100644
--- a/llvm/tools/dsymutil/RelocationMap.h
+++ b/llvm/tools/dsymutil/RelocationMap.h
@@ -37,6 +37,7 @@ struct SymbolMapping {
std::optional<yaml::Hex64> ObjectAddress;
yaml::Hex64 BinaryAddress;
yaml::Hex32 Size;
+ yaml::Hex8 Type;
SymbolMapping(std::optional<uint64_t> ObjectAddr, uint64_t BinaryAddress,
uint32_t Size)
diff --git a/llvm/tools/dsymutil/SwiftModule.cpp b/llvm/tools/dsymutil/SwiftModule.cpp
new file mode 100644
index 0000000000000..ba7ee8f1c93ed
--- /dev/null
+++ b/llvm/tools/dsymutil/SwiftModule.cpp
@@ -0,0 +1,203 @@
+//===- tools/dsymutil/SwiftModule.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Bitstream/BitstreamReader.h"
+
+static const unsigned char SWIFTMODULE_SIGNATURE[] = {0xE2, 0x9C, 0xA8, 0x0E};
+static const uint16_t expectedMajorVersion = 0;
+static const unsigned MODULE_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID;
+static const unsigned CONTROL_BLOCK_ID =
+ llvm::bitc::FIRST_APPLICATION_BLOCKID + 1;
+static const unsigned METADATA = 1;
+static const unsigned OPTIONS_BLOCK_ID =
+ llvm::bitc::FIRST_APPLICATION_BLOCKID + 8;
+static const unsigned IS_BUILT_FROM_INTERFACE = 11;
+
+static llvm::Error
+checkModuleSignature(llvm::BitstreamCursor &cursor,
+ llvm::ArrayRef<unsigned char> signature) {
+ for (unsigned char byte : signature) {
+ if (cursor.AtEndOfStream())
+ return llvm::createStringError("malformed bitstream");
+ if (llvm::Expected<llvm::SimpleBitstreamCursor::word_t> maybeRead =
+ cursor.Read(8)) {
+ if (maybeRead.get() != byte)
+ return llvm::createStringError("malformed bitstream");
+ } else
+ return maybeRead.takeError();
+ }
+ return llvm::Error::success();
+}
+
+static llvm::Error enterTopLevelModuleBlock(llvm::BitstreamCursor &cursor,
+ unsigned ID,
+ bool shouldReadBlockInfo = true) {
+ llvm::Expected<llvm::BitstreamEntry> maybeNext = cursor.advance();
+ if (!maybeNext)
+ return maybeNext.takeError();
+ llvm::BitstreamEntry next = maybeNext.get();
+
+ if (next.Kind != llvm::BitstreamEntry::SubBlock)
+ return llvm::createStringError("malformed bitstream");
+
+ if (next.ID == llvm::bitc::BLOCKINFO_BLOCK_ID) {
+ if (shouldReadBlockInfo) {
+ if (!cursor.ReadBlockInfoBlock())
+ return llvm::createStringError("malformed bitstream");
+ } else {
+ if (cursor.SkipBlock())
+ return llvm::createStringError("malformed bitstream");
+ }
+ return enterTopLevelModuleBlock(cursor, ID, false);
+ }
+
+ if (next.ID != ID)
+ return llvm::createStringError("malformed bitstream");
+
+ if (llvm::Error Err = cursor.EnterSubBlock(ID))
+ return Err;
+
+ return llvm::Error::success();
+}
+
+static llvm::Expected<bool>
+readOptionsBlock(llvm::BitstreamCursor &cursor,
+ llvm::SmallVectorImpl<uint64_t> &scratch) {
+ bool is_built_from_interface = false;
+ while (!cursor.AtEndOfStream()) {
+ llvm::Expected<llvm::BitstreamEntry> maybeEntry = cursor.advance();
+ if (!maybeEntry)
+ return maybeEntry.takeError();
+
+ llvm::BitstreamEntry entry = maybeEntry.get();
+ if (entry.Kind == llvm::BitstreamEntry::EndBlock)
+ break;
+
+ if (entry.Kind == llvm::BitstreamEntry::Error)
+ return llvm::createStringError("malformed bitstream");
+
+ if (entry.Kind == llvm::BitstreamEntry::SubBlock) {
+ if (cursor.SkipBlock())
+ return llvm::createStringError("malformed bitstream");
+ continue;
+ }
+
+ scratch.clear();
+ llvm::StringRef blobData;
+ llvm::Expected<unsigned> maybeKind =
+ cursor.readRecord(entry.ID, scratch, &blobData);
+ if (!maybeKind)
+ return maybeKind.takeError();
+ unsigned kind = maybeKind.get();
+ switch (kind) {
+ case IS_BUILT_FROM_INTERFACE:
+ is_built_from_interface = true;
+ continue;
+ default:
+ continue;
+ }
+ }
+ return is_built_from_interface;
+}
+
+static llvm::Expected<bool>
+parseControlBlock(llvm::BitstreamCursor &cursor,
+ llvm::SmallVectorImpl<uint64_t> &scratch) {
+ // The control block is malformed until we've at least read a major version
+ // number.
+ bool versionSeen = false;
+
+ while (!cursor.AtEndOfStream()) {
+ llvm::Expected<llvm::BitstreamEntry> maybeEntry = cursor.advance();
+ if (!maybeEntry) {
+ return maybeEntry.takeError();
+ }
+ llvm::BitstreamEntry entry = maybeEntry.get();
+ if (entry.Kind == llvm::BitstreamEntry::EndBlock)
+ break;
+
+ if (entry.Kind == llvm::BitstreamEntry::Error)
+ return llvm::createStringError("malformed bitstream");
+
+ if (entry.Kind == llvm::BitstreamEntry::SubBlock) {
+ if (entry.ID == OPTIONS_BLOCK_ID) {
+ if (llvm::Error Err = cursor.EnterSubBlock(OPTIONS_BLOCK_ID))
+ return Err;
+
+ return readOptionsBlock(cursor, scratch);
+ } else {
+ // Unknown metadata sub-block, possibly for use by a future version of
+ // the module format.
+ if (cursor.SkipBlock())
+ return llvm::createStringError("malformed bitstream");
+ }
+ continue;
+ }
+
+ scratch.clear();
+ llvm::StringRef blobData;
+ llvm::Expected<unsigned> maybeKind =
+ cursor.readRecord(entry.ID, scratch, &blobData);
+ if (!maybeKind)
+ return maybeKind.takeError();
+
+ unsigned kind = maybeKind.get();
+ switch (kind) {
+ case METADATA: {
+ if (versionSeen) {
+ return llvm::createStringError("multiple metadata blocks");
+ }
+
+ uint16_t versionMajor = scratch[0];
+ if (versionMajor != expectedMajorVersion)
+ return llvm::createStringError("unsupported module version");
+
+ versionSeen = true;
+ break;
+ }
+ default:
+ continue;
+ }
+ }
+ return llvm::createStringError("could not find control block");
+}
+
+llvm::Expected<bool> IsBuiltFromSwiftInterface(llvm::StringRef data) {
+ llvm::BitstreamCursor cursor(data);
+ if (llvm::Error Err = checkModuleSignature(cursor, SWIFTMODULE_SIGNATURE))
+ return llvm::joinErrors(
+ llvm::createStringError("could not check signature"), std::move(Err));
+ if (llvm::Error Err = enterTopLevelModuleBlock(cursor, MODULE_BLOCK_ID, false))
+ return llvm::joinErrors(
+ llvm::createStringError("could not enter top level block"),
+ std::move(Err));
+
+ llvm::BitstreamEntry topLevelEntry;
+ llvm::SmallVector<uint64_t, 32> scratch;
+
+ while (!cursor.AtEndOfStream()) {
+ llvm::Expected<llvm::BitstreamEntry> maybeEntry =
+ cursor.advance(llvm::BitstreamCursor::AF_DontPopBlockAtEnd);
+ if (!maybeEntry)
+ return maybeEntry.takeError();
+
+ topLevelEntry = maybeEntry.get();
+ if (topLevelEntry.Kind != llvm::BitstreamEntry::SubBlock)
+ break;
+
+ if (topLevelEntry.ID == CONTROL_BLOCK_ID) {
+ if (llvm::Error Err = cursor.EnterSubBlock(CONTROL_BLOCK_ID))
+ return Err;
+ return parseControlBlock(cursor, scratch);
+ }
+ }
+ return llvm::createStringError("no control block found");
+}
diff --git a/llvm/tools/dsymutil/SwiftModule.h b/llvm/tools/dsymutil/SwiftModule.h
new file mode 100644
index 0000000000000..599fe4530772f
--- /dev/null
+++ b/llvm/tools/dsymutil/SwiftModule.h
@@ -0,0 +1,15 @@
+//===- tools/dsymutil/SwiftModule.h ----------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TOOLS_DSYMUTIL_SWIFTMODULE_H
+#define LLVM_TOOLS_DSYMUTIL_SWIFTMODULE_H
+
+#include "llvm/Support/Error.h"
+
+llvm::Expected<bool> IsBuiltFromSwiftInterface(llvm::StringRef data);
+
+#endif
``````````
</details>
https://github.com/llvm/llvm-project/pull/134719
More information about the llvm-commits
mailing list