[PATCH] D155535: [WebAssembly][Objcopy] Write output section headers identically to inputs

Derek Schuff via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 17 18:25:46 PDT 2023


dschuff updated this revision to Diff 541299.
dschuff added a comment.

rename test file


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D155535/new/

https://reviews.llvm.org/D155535

Files:
  llvm/include/llvm/Object/Wasm.h
  llvm/lib/ObjCopy/wasm/WasmObject.h
  llvm/lib/ObjCopy/wasm/WasmReader.cpp
  llvm/lib/ObjCopy/wasm/WasmWriter.cpp
  llvm/lib/Object/WasmObjectFile.cpp
  llvm/test/tools/llvm-objcopy/wasm/Inputs/sec-header-3bytelebs.wasm
  llvm/test/tools/llvm-objcopy/wasm/section-header-size.test


Index: llvm/test/tools/llvm-objcopy/wasm/section-header-size.test
===================================================================
--- /dev/null
+++ llvm/test/tools/llvm-objcopy/wasm/section-header-size.test
@@ -0,0 +1,7 @@
+## Test that objcopy generates section headers that are identical to those from
+## the input binary, including the encoded size of the LEB that represents the
+## section size. sec-header-3bytelebs.wasm has LEB encodings padded to 3 bytes, which
+## is unlikely to happen accidentally (since the smallest size is 1 byte,
+## and clang pads to 5)
+# RUN: llvm-objcopy %p/Inputs/sec-header-3bytelebs.wasm %t.wasm2
+# RUN: diff %p/Inputs/sec-header-3bytelebs.wasm %t.wasm2
Index: llvm/lib/Object/WasmObjectFile.cpp
===================================================================
--- llvm/lib/Object/WasmObjectFile.cpp
+++ llvm/lib/Object/WasmObjectFile.cpp
@@ -268,7 +268,11 @@
   Section.Offset = Ctx.Ptr - Ctx.Start;
   Section.Type = readUint8(Ctx);
   LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
+  // When reading the section's size, store the size of the LEB used to encode
+  // it. This allows objcopy/strip to reproduce the binary identically.
+  const uint8_t *PreSizePtr = Ctx.Ptr;
   uint32_t Size = readVaruint32(Ctx);
+  Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr;
   if (Size == 0)
     return make_error<StringError>("zero length section",
                                    object_error::parse_failed);
Index: llvm/lib/ObjCopy/wasm/WasmWriter.cpp
===================================================================
--- llvm/lib/ObjCopy/wasm/WasmWriter.cpp
+++ llvm/lib/ObjCopy/wasm/WasmWriter.cpp
@@ -29,16 +29,19 @@
   SectionSize = S.Contents.size();
   if (HasName)
     SectionSize += getULEB128Size(S.Name.size()) + S.Name.size();
-  // Pad the LEB value out to 5 bytes to make it a predictable size, and
-  // match the behavior of clang.
-  encodeULEB128(SectionSize, OS, 5);
+  // If we read this section from an object file, use its original size for the
+  // padding of the LEB value to avoid changing the file size. Otherwise, pad
+  // out to 5 bytes to make it predictable, and match the behavior of clang.
+  unsigned HeaderSecSizeEncodingLen =
+      S.HeaderSecSizeEncodingLen ? *S.HeaderSecSizeEncodingLen : 5;
+  encodeULEB128(SectionSize, OS, HeaderSecSizeEncodingLen);
   if (HasName) {
     encodeULEB128(S.Name.size(), OS);
     OS << S.Name;
   }
   // Total section size is the content size plus 1 for the section type and
-  // 5 for the LEB-encoded size.
-  SectionSize = SectionSize + 1 + 5;
+  // the LEB-encoded size.
+  SectionSize = SectionSize + 1 + HeaderSecSizeEncodingLen;
   return Header;
 }
 
Index: llvm/lib/ObjCopy/wasm/WasmReader.cpp
===================================================================
--- llvm/lib/ObjCopy/wasm/WasmReader.cpp
+++ llvm/lib/ObjCopy/wasm/WasmReader.cpp
@@ -22,8 +22,8 @@
   Obj->Sections.reserve(WasmObj.getNumSections());
   for (const SectionRef &Sec : WasmObj.sections()) {
     const WasmSection &WS = WasmObj.getWasmSection(Sec);
-    Obj->Sections.push_back(
-        {static_cast<uint8_t>(WS.Type), WS.Name, WS.Content});
+    Obj->Sections.push_back({static_cast<uint8_t>(WS.Type),
+                             WS.HeaderSecSizeEncodingLen, WS.Name, WS.Content});
     // Give known sections standard names to allow them to be selected. (Custom
     // sections already have their names filled in by the parser).
     Section &ReaderSec = Obj->Sections.back();
Index: llvm/lib/ObjCopy/wasm/WasmObject.h
===================================================================
--- llvm/lib/ObjCopy/wasm/WasmObject.h
+++ llvm/lib/ObjCopy/wasm/WasmObject.h
@@ -23,6 +23,7 @@
   // For now, each section is only an opaque binary blob with no distinction
   // between custom and known sections.
   uint8_t SectionType;
+  std::optional<uint8_t> HeaderSecSizeEncodingLen;
   StringRef Name;
   ArrayRef<uint8_t> Contents;
 };
Index: llvm/include/llvm/Object/Wasm.h
===================================================================
--- llvm/include/llvm/Object/Wasm.h
+++ llvm/include/llvm/Object/Wasm.h
@@ -110,6 +110,7 @@
   uint32_t Comdat = UINT32_MAX; // From the "comdat info" section
   ArrayRef<uint8_t> Content; // Section content
   std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section
+  std::optional<uint8_t> HeaderSecSizeEncodingLen;
 };
 
 struct WasmSegment {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155535.541299.patch
Type: text/x-patch
Size: 4458 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230718/9f8ac746/attachment-0001.bin>


More information about the llvm-commits mailing list