[lld] [llvm] [WebAssembly][Object] Support more elem segment flags (PR #123427)

Derek Schuff via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 17 16:13:31 PST 2025


https://github.com/dschuff created https://github.com/llvm/llvm-project/pull/123427

Some tools (e.g. Rust tooling) produce element segment descriptors with neither 
elemkind or element type descriptors, but with init exprs instead of func indices
(this is with the flags value of 4 in
https://webassembly.github.io/spec/core/binary/modules.html#element-section).
LLVM doesn't fully model reference types or the various ways to initialize element
segments, but we do want to correctly parse and skip over all type sections, so 
this change updates the object parser to handle that case, and refactors for more
clarity.

Also support parsing files that export imported (undefined) functions.

>From e0368613aae8bfaa4f59981ee7b4a362d7f8c790 Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Wed, 15 Jan 2025 15:18:52 -0800
Subject: [PATCH 1/4] Support more elem segment flags

---
 llvm/include/llvm/BinaryFormat/Wasm.h |  8 +++++
 llvm/lib/Object/WasmObjectFile.cpp    | 43 ++++++++++++++++++---------
 2 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index 759e4321250912..66b9cb1cdc8359 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -415,6 +415,14 @@ struct WasmDataSegment {
   uint32_t Comdat; // from the "comdat info" section
 };
 
+// 3 different element segment modes are encodable. This class is currently
+// only used during decoding (see WasmElemSegment below).
+enum class ElemSegmentMode {
+  Active,
+  Passive,
+  Declarative
+};
+
 // Represents a Wasm element segment, with some limitations compared the spec:
 // 1) Does not model passive or declarative segments (Segment will end up with
 // an Offset field of i32.const 0)
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 2c9b878a4cde93..41499c98ccdc5f 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -1440,15 +1440,22 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
     Info.Flags = 0;
     switch (Ex.Kind) {
     case wasm::WASM_EXTERNAL_FUNCTION: {
-      if (!isDefinedFunctionIndex(Ex.Index))
+      if (!isValidFunctionIndex(Ex.Index))
         return make_error<GenericBinaryError>("invalid function export",
                                               object_error::parse_failed);
-      getDefinedFunction(Ex.Index).ExportName = Ex.Name;
       Info.Kind = wasm::WASM_SYMBOL_TYPE_FUNCTION;
       Info.ElementIndex = Ex.Index;
-      unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions;
-      wasm::WasmFunction &Function = Functions[FuncIndex];
-      Signature = &Signatures[Function.SigIndex];
+      if (isDefinedFunctionIndex(Ex.Index)) {
+        getDefinedFunction(Ex.Index).ExportName = Ex.Name;
+        unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions;
+        wasm::WasmFunction &Function = Functions[FuncIndex];
+        Signature = &Signatures[Function.SigIndex];
+        break;
+      } else {
+        // Function is imported. LLVM object files don't use this pattern and
+        // we still treat this as an undefined symbol, but we want to parse
+        // it.
+      }
       break;
     }
     case wasm::WASM_EXTERNAL_GLOBAL: {
@@ -1645,17 +1652,24 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
       return make_error<GenericBinaryError>(
           "Unsupported flags for element segment", object_error::parse_failed);
 
-    bool IsPassive = (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) != 0;
-    bool IsDeclarative =
-        IsPassive && (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_DECLARATIVE);
+    wasm::ElemSegmentMode Mode;
+    if ((Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_PASSIVE) == 0) {
+      Mode = wasm::ElemSegmentMode::Active;
+    } else if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_IS_DECLARATIVE) {
+      Mode = wasm::ElemSegmentMode::Declarative;
+    } else {
+      Mode = wasm::ElemSegmentMode::Passive;
+    }
     bool HasTableNumber =
-        !IsPassive &&
+        Mode == wasm::ElemSegmentMode::Active &&
         (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER);
-    bool HasInitExprs =
-        (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS);
     bool HasElemKind =
         (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) &&
-        !HasInitExprs;
+        ~(Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS);
+    bool HasElemType = (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS) &&
+                       ~(Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND);
+    bool HasInitExprs =
+        (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS);
 
     if (HasTableNumber)
       Segment.TableNumber = readVaruint32(Ctx);
@@ -1666,7 +1680,7 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
       return make_error<GenericBinaryError>("invalid TableNumber",
                                             object_error::parse_failed);
 
-    if (IsPassive || IsDeclarative) {
+    if (Mode != wasm::ElemSegmentMode::Active) {
       Segment.Offset.Extended = false;
       Segment.Offset.Inst.Opcode = wasm::WASM_OPCODE_I32_CONST;
       Segment.Offset.Inst.Value.Int32 = 0;
@@ -1692,10 +1706,11 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
                                                 object_error::parse_failed);
         Segment.ElemKind = wasm::ValType::FUNCREF;
       }
-    } else if (HasInitExprs) {
+    } else if (HasElemType) {
       auto ElemType = parseValType(Ctx, readVaruint32(Ctx));
       Segment.ElemKind = ElemType;
     } else {
+      // Here the type is nullable (ref null func) but we don't distinguish.
       Segment.ElemKind = wasm::ValType::FUNCREF;
     }
 

>From 3930ba4c8971cf0b15a13bf94bcb8bd5415d5e58 Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Fri, 17 Jan 2025 15:58:25 -0800
Subject: [PATCH 2/4] rename for clarity

---
 lld/wasm/SyntheticSections.cpp                |   2 +-
 llvm/include/llvm/BinaryFormat/Wasm.h         |   2 +-
 llvm/lib/MC/WasmObjectWriter.cpp              |   2 +-
 llvm/lib/Object/WasmObjectFile.cpp            |   8 ++++----
 llvm/lib/ObjectYAML/WasmEmitter.cpp           |   2 +-
 llvm/lib/ObjectYAML/WasmYAML.cpp              |   2 +-
 llvm/test/Object/Inputs/WASM/multi-table.wasm | Bin 185 -> 190 bytes
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 715fba1ee6da54..7fb44b9f0c0091 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -594,7 +594,7 @@ void ElemSection::writeBody() {
   }
   writeInitExpr(os, initExpr);
 
-  if (flags & WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) {
+  if (flags & WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) {
     // We only write active function table initializers, for which the elem kind
     // is specified to be written as 0x00 and interpreted to mean "funcref".
     const uint8_t elemKind = 0;
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index 66b9cb1cdc8359..19097b458fb93b 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -170,7 +170,7 @@ enum : unsigned {
   WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER = 0x02, // if passive == 0
   WASM_ELEM_SEGMENT_HAS_INIT_EXPRS = 0x04,
 };
-const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND = 0x3;
+const unsigned WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC = 0x3;
 
 // Feature policy prefixes used in the custom "target_features" section
 enum : uint8_t {
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 29a8c53d350a40..8ddbe929e68b94 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -1019,7 +1019,7 @@ void WasmObjectWriter::writeElemSection(
   encodeSLEB128(InitialTableOffset, W->OS);
   W->OS << char(wasm::WASM_OPCODE_END);
 
-  if (Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) {
+  if (Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) {
     // We only write active function table initializers, for which the elem kind
     // is specified to be written as 0x00 and interpreted to mean "funcref".
     const uint8_t ElemKind = 0;
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 41499c98ccdc5f..7951bd0a606c76 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -1664,10 +1664,10 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
         Mode == wasm::ElemSegmentMode::Active &&
         (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER);
     bool HasElemKind =
-        (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) &&
-        ~(Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS);
-    bool HasElemType = (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS) &&
-                       ~(Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND);
+        (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) &&
+        !(Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS);
+    bool HasElemType = (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) &&
+                       (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS);
     bool HasInitExprs =
         (Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_INIT_EXPRS);
 
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 817d364694b43a..bd016764f58626 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -497,7 +497,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
 
     writeInitExpr(OS, Segment.Offset);
 
-    if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) {
+    if (Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC) {
       // We only support active function table initializers, for which the elem
       // kind is specified to be written as 0x00 and interpreted to mean
       // "funcref".
diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index 0636e19e05353d..6af66ba62be188 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -381,7 +381,7 @@ void MappingTraits<WasmYAML::ElemSegment>::mapping(
       Segment.Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER)
     IO.mapOptional("TableNumber", Segment.TableNumber);
   if (!IO.outputting() ||
-      Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND)
+      Segment.Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_DESC)
     IO.mapOptional("ElemKind", Segment.ElemKind);
   // TODO: Omit "offset" for passive segments? It's neither meaningful nor
   // encoded.
diff --git a/llvm/test/Object/Inputs/WASM/multi-table.wasm b/llvm/test/Object/Inputs/WASM/multi-table.wasm
index 47f5d8311cb74f76485577df85578b62f896361d..81e52a2d3e28658cfdb2a488bcd1bce7fea16575 100644
GIT binary patch
delta 35
ncmdnVxQ}sy6H^xVM3)pX7Dond1}=VPR)$MVQryf8j9?4^javnp

delta 30
icmdnTxRY^$6H_YJM3)pnE`DZKhD%IR+{_G&U<?3pss%p)


>From 256d901ebac58e07c8858aa52f52b4694f99f1e0 Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Fri, 17 Jan 2025 16:01:26 -0800
Subject: [PATCH 3/4] clean up

---
 llvm/lib/Object/WasmObjectFile.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 7951bd0a606c76..871f3cf8d50d38 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -1450,12 +1450,10 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
         unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions;
         wasm::WasmFunction &Function = Functions[FuncIndex];
         Signature = &Signatures[Function.SigIndex];
-        break;
-      } else {
-        // Function is imported. LLVM object files don't use this pattern and
-        // we still treat this as an undefined symbol, but we want to parse
-        // it.
       }
+      // Else the function is imported. LLVM object files don't use this
+      // pattern and we still treat this as an undefined symbol, but we want to
+      // parse it without crashing.
       break;
     }
     case wasm::WASM_EXTERNAL_GLOBAL: {

>From a6f60fc120e856d805c5f99278c46cca81950a37 Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff at chromium.org>
Date: Fri, 17 Jan 2025 16:08:00 -0800
Subject: [PATCH 4/4] remove bad comment

---
 llvm/lib/Object/WasmObjectFile.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 871f3cf8d50d38..beed5774813484 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -1708,7 +1708,6 @@ Error WasmObjectFile::parseElemSection(ReadContext &Ctx) {
       auto ElemType = parseValType(Ctx, readVaruint32(Ctx));
       Segment.ElemKind = ElemType;
     } else {
-      // Here the type is nullable (ref null func) but we don't distinguish.
       Segment.ElemKind = wasm::ValType::FUNCREF;
     }
 



More information about the llvm-commits mailing list