[llvm-branch-commits] [llvm] 847c5b8 - [WebAssembly][Objcopy] Write output section headers identically to inputs

Tobias Hieta via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Aug 16 23:25:33 PDT 2023


Author: Derek Schuff
Date: 2023-08-17T08:25:01+02:00
New Revision: 847c5b81ec1fa5015a7dd69ec13acb685bcd26df

URL: https://github.com/llvm/llvm-project/commit/847c5b81ec1fa5015a7dd69ec13acb685bcd26df
DIFF: https://github.com/llvm/llvm-project/commit/847c5b81ec1fa5015a7dd69ec13acb685bcd26df.diff

LOG: [WebAssembly][Objcopy] Write output section headers identically to inputs

Previously when objcopy generated section headers, it padded the LEB
that encodes the section size out to 5 bytes, matching the behavior of
clang. This is correct, but results in a binary that differs from the
input. This can sometimes have undesirable consequences (e.g. breaking
source maps).

This change makes the object reader remember the size of the LEB
encoding in the section header, so that llvm-objcopy can reproduce it
exactly. For sections not read from an object file (e.g. that
llvm-objcopy is adding itself), pad to 5 bytes.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D155535

(cherry picked from commit 1b21067cf247c62c2442daa7ee2d3915249d1ee2)

Added: 
    llvm/test/ObjectYAML/wasm/section_header_size.yaml
    llvm/test/tools/llvm-objcopy/wasm/section-header-size.test

Modified: 
    llvm/include/llvm/Object/Wasm.h
    llvm/include/llvm/ObjectYAML/WasmYAML.h
    llvm/lib/ObjCopy/wasm/WasmObject.h
    llvm/lib/ObjCopy/wasm/WasmReader.cpp
    llvm/lib/ObjCopy/wasm/WasmWriter.cpp
    llvm/lib/Object/WasmObjectFile.cpp
    llvm/lib/ObjectYAML/WasmEmitter.cpp
    llvm/lib/ObjectYAML/WasmYAML.cpp
    llvm/tools/obj2yaml/wasm2yaml.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index 8dd8918ddf21d5..dfab4c68d18f12 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -104,12 +104,14 @@ class WasmSymbol {
 struct WasmSection {
   WasmSection() = default;
 
-  uint32_t Type = 0;         // Section type (See below)
-  uint32_t Offset = 0;       // Offset with in the file
+  uint32_t Type = 0;
+  uint32_t Offset = 0;       // Offset within the file
   StringRef Name;            // Section name (User-defined sections only)
   uint32_t Comdat = UINT32_MAX; // From the "comdat info" section
-  ArrayRef<uint8_t> Content; // Section content
-  std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section
+  ArrayRef<uint8_t> Content;
+  std::vector<wasm::WasmRelocation> Relocations;
+  // Length of the LEB encoding of the section header's size field
+  std::optional<uint8_t> HeaderSecSizeEncodingLen;
 };
 
 struct WasmSegment {

diff  --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h
index 0f6c4f06665fb8..94ecc2fcfdb536 100644
--- a/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -189,6 +189,7 @@ struct Section {
 
   SectionType Type;
   std::vector<Relocation> Relocations;
+  std::optional<uint8_t> HeaderSecSizeEncodingLen;
 };
 
 struct CustomSection : Section {

diff  --git a/llvm/lib/ObjCopy/wasm/WasmObject.h b/llvm/lib/ObjCopy/wasm/WasmObject.h
index 9bc5831926c6c7..f860ec697e5684 100644
--- a/llvm/lib/ObjCopy/wasm/WasmObject.h
+++ b/llvm/lib/ObjCopy/wasm/WasmObject.h
@@ -23,6 +23,7 @@ struct Section {
   // For now, each section is only an opaque binary blob with no distinction
   // between custom and known sections.
   uint8_t SectionType;
+  std::optional<uint8_t> HeaderSecSizeEncodingLen;
   StringRef Name;
   ArrayRef<uint8_t> Contents;
 };

diff  --git a/llvm/lib/ObjCopy/wasm/WasmReader.cpp b/llvm/lib/ObjCopy/wasm/WasmReader.cpp
index 6e7d8b5591c928..578e78955af3e4 100644
--- a/llvm/lib/ObjCopy/wasm/WasmReader.cpp
+++ b/llvm/lib/ObjCopy/wasm/WasmReader.cpp
@@ -22,8 +22,8 @@ Expected<std::unique_ptr<Object>> Reader::create() const {
   Obj->Sections.reserve(WasmObj.getNumSections());
   for (const SectionRef &Sec : WasmObj.sections()) {
     const WasmSection &WS = WasmObj.getWasmSection(Sec);
-    Obj->Sections.push_back(
-        {static_cast<uint8_t>(WS.Type), WS.Name, WS.Content});
+    Obj->Sections.push_back({static_cast<uint8_t>(WS.Type),
+                             WS.HeaderSecSizeEncodingLen, WS.Name, WS.Content});
     // Give known sections standard names to allow them to be selected. (Custom
     // sections already have their names filled in by the parser).
     Section &ReaderSec = Obj->Sections.back();

diff  --git a/llvm/lib/ObjCopy/wasm/WasmWriter.cpp b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp
index fdcd441cc798cf..bfab25ce8097a5 100644
--- a/llvm/lib/ObjCopy/wasm/WasmWriter.cpp
+++ b/llvm/lib/ObjCopy/wasm/WasmWriter.cpp
@@ -29,16 +29,19 @@ Writer::SectionHeader Writer::createSectionHeader(const Section &S,
   SectionSize = S.Contents.size();
   if (HasName)
     SectionSize += getULEB128Size(S.Name.size()) + S.Name.size();
-  // Pad the LEB value out to 5 bytes to make it a predictable size, and
-  // match the behavior of clang.
-  encodeULEB128(SectionSize, OS, 5);
+  // If we read this section from an object file, use its original size for the
+  // padding of the LEB value to avoid changing the file size. Otherwise, pad
+  // out to 5 bytes to make it predictable, and match the behavior of clang.
+  unsigned HeaderSecSizeEncodingLen =
+      S.HeaderSecSizeEncodingLen ? *S.HeaderSecSizeEncodingLen : 5;
+  encodeULEB128(SectionSize, OS, HeaderSecSizeEncodingLen);
   if (HasName) {
     encodeULEB128(S.Name.size(), OS);
     OS << S.Name;
   }
   // Total section size is the content size plus 1 for the section type and
-  // 5 for the LEB-encoded size.
-  SectionSize = SectionSize + 1 + 5;
+  // the LEB-encoded size.
+  SectionSize = SectionSize + 1 + HeaderSecSizeEncodingLen;
   return Header;
 }
 

diff  --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index a72242bc4ac220..11b9b579a8d7cc 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -268,7 +268,11 @@ static Error readSection(WasmSection &Section, WasmObjectFile::ReadContext &Ctx,
   Section.Offset = Ctx.Ptr - Ctx.Start;
   Section.Type = readUint8(Ctx);
   LLVM_DEBUG(dbgs() << "readSection type=" << Section.Type << "\n");
+  // When reading the section's size, store the size of the LEB used to encode
+  // it. This allows objcopy/strip to reproduce the binary identically.
+  const uint8_t *PreSizePtr = Ctx.Ptr;
   uint32_t Size = readVaruint32(Ctx);
+  Section.HeaderSecSizeEncodingLen = Ctx.Ptr - PreSizePtr;
   if (Size == 0)
     return make_error<StringError>("zero length section",
                                    object_error::parse_failed);

diff  --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 6230312eff7b47..9b8fd11f85437e 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -646,8 +646,18 @@ bool WasmWriter::writeWasm(raw_ostream &OS) {
 
     StringStream.flush();
 
+    unsigned HeaderSecSizeEncodingLen =
+        Sec->HeaderSecSizeEncodingLen ? *Sec->HeaderSecSizeEncodingLen : 5;
+    unsigned RequiredLen = getULEB128Size(OutString.size());
+    // Wasm spec does not allow LEBs larger than 5 bytes
+    assert(RequiredLen <= 5);
+    if (HeaderSecSizeEncodingLen < RequiredLen) {
+      reportError("section header length can't be encoded in a LEB of size " +
+                  Twine(HeaderSecSizeEncodingLen));
+      return false;
+    }
     // Write the section size followed by the content
-    encodeULEB128(OutString.size(), OS);
+    encodeULEB128(OutString.size(), OS, HeaderSecSizeEncodingLen);
     OS << OutString;
   }
 

diff  --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index 7ca422487df26d..ef47766a239420 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -45,6 +45,7 @@ void MappingTraits<WasmYAML::Object>::mapping(IO &IO,
 static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) {
   IO.mapRequired("Type", Section.Type);
   IO.mapOptional("Relocations", Section.Relocations);
+  IO.mapOptional("HeaderSecSizeEncodingLen", Section.HeaderSecSizeEncodingLen);
 }
 
 static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) {

diff  --git a/llvm/test/ObjectYAML/wasm/section_header_size.yaml b/llvm/test/ObjectYAML/wasm/section_header_size.yaml
new file mode 100644
index 00000000000000..b632588d92e54d
--- /dev/null
+++ b/llvm/test/ObjectYAML/wasm/section_header_size.yaml
@@ -0,0 +1,91 @@
+## Test that obj2yaml output includes the section header size encoding length
+## only when the length isn't padded to 5 bytes.
+# RUN: yaml2obj --docnum=1 %s | obj2yaml | FileCheck %s
+
+--- !WASM
+FileHeader:
+  Version:         0x1
+Sections:
+  - Type:            TYPE
+    HeaderSecSizeEncodingLen:   3
+    Signatures:
+      - Index:           0
+        ParamTypes:
+          - I32
+          - I32
+        ReturnTypes:
+          - I32
+  - Type:            FUNCTION
+    HeaderSecSizeEncodingLen:  4
+    FunctionTypes:             [ 0 ]
+  - Type:            MEMORY
+    HeaderSecSizeEncodingLen:  1
+    Memories:
+      - Flags:           [ HAS_MAX ]
+        Minimum:         0x100
+        Maximum:         0x100
+  - Type:            EXPORT
+    HeaderSecSizeEncodingLen:  5
+    Exports:
+      - Name:            add
+        Kind:            FUNCTION
+        Index:           0
+  - Type:            CODE
+    HeaderSecSizeEncodingLen:  2
+    Functions:
+      - Index:           0
+        Locals:          []
+        Body:            200020016A0B
+...
+# CHECK: --- !WASM
+# CHECK-NEXT: FileHeader:
+# CHECK-NEXT:   Version:         0x1
+# CHECK-NEXT: Sections:
+# CHECK-NEXT:   - Type:            TYPE
+# CHECK-NEXT:     HeaderSecSizeEncodingLen:   3
+# CHECK-NEXT:     Signatures:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         ParamTypes:
+# CHECK-NEXT:           - I32
+# CHECK-NEXT:           - I32
+# CHECK-NEXT:         ReturnTypes:
+# CHECK-NEXT:           - I32
+# CHECK-NEXT:   - Type:            FUNCTION
+# CHECK-NEXT:     HeaderSecSizeEncodingLen:  4
+# CHECK-NEXT:     FunctionTypes:             [ 0 ]
+# CHECK-NEXT:   - Type:            MEMORY
+# CHECK-NEXT:     Memories:
+# CHECK-NEXT:       - Flags:           [ HAS_MAX ]
+# CHECK-NEXT:         Minimum:         0x100
+# CHECK-NEXT:         Maximum:         0x100
+# CHECK-NEXT:   - Type:            EXPORT
+# CHECK-NEXT:     Exports:
+# CHECK-NEXT:       - Name:            add
+# CHECK-NEXT:         Kind:            FUNCTION
+# CHECK-NEXT:         Index:           0
+# CHECK-NEXT:   - Type:            CODE
+# CHECK-NEXT:     HeaderSecSizeEncodingLen:  2
+# CHECK-NEXT:     Functions:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         Locals:          []
+# CHECK-NEXT:         Body:            200020016A0B
+
+## Test if we correctly error out if the provided section header size is less
+## than the size required.
+# RUN: not yaml2obj --docnum=2 %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID
+# INVALID: yaml2obj: error: section header length can't be encoded in a LEB of size 0
+
+--- !WASM
+FileHeader:
+  Version:         0x1
+Sections:
+  - Type:            TYPE
+    HeaderSecSizeEncodingLen:   0
+    Signatures:
+      - Index:           0
+        ParamTypes:
+          - I32
+          - I32
+        ReturnTypes:
+          - I32
+...

diff  --git a/llvm/test/tools/llvm-objcopy/wasm/section-header-size.test b/llvm/test/tools/llvm-objcopy/wasm/section-header-size.test
new file mode 100644
index 00000000000000..4ea73f6668e354
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/wasm/section-header-size.test
@@ -0,0 +1,41 @@
+## Test that objcopy generates section headers that are identical to those from
+## the input binary, including the encoded size of the LEB that represents the
+## section size.
+
+# RUN: yaml2obj %s -o %t.wasm
+# RUN: llvm-objcopy %t.wasm %t.wasm.copy
+# RUN: 
diff  %t.wasm %t.wasm.copy
+
+--- !WASM
+FileHeader:
+  Version:         0x1
+Sections:
+  - Type:            TYPE
+    HeaderSecSizeEncodingLen:   3
+    Signatures:
+      - Index:           0
+        ParamTypes:
+          - I32
+          - I32
+        ReturnTypes:
+          - I32
+  - Type:            FUNCTION
+    HeaderSecSizeEncodingLen:  4
+    FunctionTypes:             [ 0 ]
+  - Type:            MEMORY
+    HeaderSecSizeEncodingLen:  1
+    Memories:
+      - Flags:           [ HAS_MAX ]
+        Minimum:         0x100
+        Maximum:         0x100
+  - Type:            EXPORT
+    HeaderSecSizeEncodingLen:  5
+    Exports:
+      - Name:            add
+        Kind:            FUNCTION
+        Index:           0
+  - Type:            CODE
+    Functions:
+      - Index:           0
+        Locals:          []
+        Body:            200020016A0B

diff  --git a/llvm/tools/obj2yaml/wasm2yaml.cpp b/llvm/tools/obj2yaml/wasm2yaml.cpp
index 813d03da58b12a..c450c02a05ec43 100644
--- a/llvm/tools/obj2yaml/wasm2yaml.cpp
+++ b/llvm/tools/obj2yaml/wasm2yaml.cpp
@@ -10,6 +10,7 @@
 #include "llvm/Object/COFF.h"
 #include "llvm/ObjectYAML/WasmYAML.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/YAMLTraits.h"
 
 using namespace llvm;
@@ -392,6 +393,17 @@ ErrorOr<WasmYAML::Object *> WasmDumper::dump() {
       llvm_unreachable("Unknown section type");
       break;
     }
+
+    // Only propagate the section size encoding length if it's not the minimal
+    // size or 5 (the default "padded" value). This is to avoid having every
+    // YAML output polluted with this value when we usually don't care about it
+    // (and avoid rewriting all the test expectations).
+    if (WasmSec.HeaderSecSizeEncodingLen &&
+        WasmSec.HeaderSecSizeEncodingLen !=
+            getULEB128Size(WasmSec.Content.size()) &&
+        WasmSec.HeaderSecSizeEncodingLen != 5)
+      S->HeaderSecSizeEncodingLen = WasmSec.HeaderSecSizeEncodingLen;
+
     for (const wasm::WasmRelocation &Reloc : WasmSec.Relocations) {
       WasmYAML::Relocation R;
       R.Type = Reloc.Type;


        


More information about the llvm-branch-commits mailing list