[llvm] 7ee758d - [WebAssembly] MC: Fix for data aliases with offsets (getelementptr)

Sam Clegg via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 17 16:26:06 PDT 2020


Author: Sam Clegg
Date: 2020-06-17T16:25:50-07:00
New Revision: 7ee758d691b2afc2f130ba3a8f8507fb6415f1d9

URL: https://github.com/llvm/llvm-project/commit/7ee758d691b2afc2f130ba3a8f8507fb6415f1d9
DIFF: https://github.com/llvm/llvm-project/commit/7ee758d691b2afc2f130ba3a8f8507fb6415f1d9.diff

LOG: [WebAssembly] MC: Fix for data aliases with offsets (getelementptr)

For some reason we hadn't seen such cases in the wild which makes
me think that clang and rustc don't generate these.  In the bug which
reproduces it only occurs with LTO so my guess is that some LTO pass
is creating this alias + gep.

See: https://github.com/emscripten-core/emscripten/issues/8731

Differential Revision: https://reviews.llvm.org/D79462

Added: 
    llvm/test/CodeGen/WebAssembly/aliases.ll
    llvm/test/MC/WebAssembly/alias.s
    llvm/test/MC/WebAssembly/offset.s

Modified: 
    llvm/lib/MC/WasmObjectWriter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 460f431b8563..0916ce514519 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -337,7 +337,7 @@ class WasmObjectWriter : public MCObjectWriter {
   void writeDataCountSection();
   void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
                         ArrayRef<WasmFunction> Functions);
-  void writeDataSection();
+  void writeDataSection(const MCAsmLayout &Layout);
   void writeEventSection(ArrayRef<wasm::WasmEventType> Events);
   void writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals);
   void writeRelocSection(uint32_t SectionIndex, StringRef Name,
@@ -353,9 +353,10 @@ class WasmObjectWriter : public MCObjectWriter {
   updateCustomSectionRelocations(const SmallVector<WasmFunction, 4> &Functions,
                                  const MCAsmLayout &Layout);
 
-  uint64_t getProvisionalValue(const WasmRelocationEntry &RelEntry);
+  uint64_t getProvisionalValue(const WasmRelocationEntry &RelEntry,
+                               const MCAsmLayout &Layout);
   void applyRelocations(ArrayRef<WasmRelocationEntry> Relocations,
-                        uint64_t ContentsOffset);
+                        uint64_t ContentsOffset, const MCAsmLayout &Layout);
 
   uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry);
   uint32_t getFunctionType(const MCSymbolWasm &Symbol);
@@ -480,9 +481,9 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
 
   if (SymA->isVariable()) {
     const MCExpr *Expr = SymA->getVariableValue();
-    const auto *Inner = cast<MCSymbolRefExpr>(Expr);
-    if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
-      llvm_unreachable("weakref used in reloc not yet implemented");
+    if (const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr))
+      if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
+        llvm_unreachable("weakref used in reloc not yet implemented");
   }
 
   // Put any constant offset in an addend. Offsets can be negative, and
@@ -541,22 +542,13 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
   }
 }
 
-static const MCSymbolWasm *resolveSymbol(const MCSymbolWasm &Symbol) {
-  const MCSymbolWasm* Ret = &Symbol;
-  while (Ret->isVariable()) {
-    const MCExpr *Expr = Ret->getVariableValue();
-    auto *Inner = cast<MCSymbolRefExpr>(Expr);
-    Ret = cast<MCSymbolWasm>(&Inner->getSymbol());
-  }
-  return Ret;
-}
-
 // Compute a value to write into the code at the location covered
 // by RelEntry. This value isn't used by the static linker; it just serves
 // to make the object format more readable and more likely to be directly
 // useable.
 uint64_t
-WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
+WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry,
+                                      const MCAsmLayout &Layout) {
   if ((RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_LEB ||
        RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_I32) &&
       !RelEntry.Symbol->isGlobal()) {
@@ -569,12 +561,13 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
   case wasm::R_WASM_TABLE_INDEX_SLEB:
   case wasm::R_WASM_TABLE_INDEX_I32: {
     // Provisional value is table address of the resolved symbol itself
-    const MCSymbolWasm *Sym = resolveSymbol(*RelEntry.Symbol);
-    assert(Sym->isFunction());
+    const MCSymbolWasm *Base =
+        cast<MCSymbolWasm>(Layout.getBaseSymbol(*RelEntry.Symbol));
+    assert(Base->isFunction());
     if (RelEntry.Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB)
-      return TableIndices[Sym] - InitialTableOffset;
+      return TableIndices[Base] - InitialTableOffset;
     else
-      return TableIndices[Sym];
+      return TableIndices[Base];
   }
   case wasm::R_WASM_TYPE_INDEX_LEB:
     // Provisional value is same as the index
@@ -601,11 +594,12 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
   case wasm::R_WASM_MEMORY_ADDR_I32:
   case wasm::R_WASM_MEMORY_ADDR_I64: {
     // Provisional value is address of the global
-    const MCSymbolWasm *Sym = resolveSymbol(*RelEntry.Symbol);
+    const MCSymbolWasm *Base =
+        cast<MCSymbolWasm>(Layout.getBaseSymbol(*RelEntry.Symbol));
     // For undefined symbols, use zero
-    if (!Sym->isDefined())
+    if (!Base->isDefined())
       return 0;
-    const wasm::WasmDataReference &Ref = DataLocations[Sym];
+    const wasm::WasmDataReference &Ref = DataLocations[Base];
     const WasmDataSegment &Segment = DataSegments[Ref.Segment];
     // Ignore overflow. LLVM allows address arithmetic to silently wrap.
     return Segment.Offset + Ref.Offset + RelEntry.Addend;
@@ -668,7 +662,8 @@ WasmObjectWriter::getRelocationIndexValue(const WasmRelocationEntry &RelEntry) {
 // Apply the portions of the relocation records that we can handle ourselves
 // directly.
 void WasmObjectWriter::applyRelocations(
-    ArrayRef<WasmRelocationEntry> Relocations, uint64_t ContentsOffset) {
+    ArrayRef<WasmRelocationEntry> Relocations, uint64_t ContentsOffset,
+    const MCAsmLayout &Layout) {
   auto &Stream = static_cast<raw_pwrite_stream &>(W.OS);
   for (const WasmRelocationEntry &RelEntry : Relocations) {
     uint64_t Offset = ContentsOffset +
@@ -676,7 +671,7 @@ void WasmObjectWriter::applyRelocations(
                       RelEntry.Offset;
 
     LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n");
-    auto Value = getProvisionalValue(RelEntry);
+    auto Value = getProvisionalValue(RelEntry, Layout);
 
     switch (RelEntry.Type) {
     case wasm::R_WASM_FUNCTION_INDEX_LEB:
@@ -921,12 +916,12 @@ void WasmObjectWriter::writeCodeSection(const MCAssembler &Asm,
   }
 
   // Apply fixups.
-  applyRelocations(CodeRelocations, Section.ContentsOffset);
+  applyRelocations(CodeRelocations, Section.ContentsOffset, Layout);
 
   endSection(Section);
 }
 
-void WasmObjectWriter::writeDataSection() {
+void WasmObjectWriter::writeDataSection(const MCAsmLayout &Layout) {
   if (DataSegments.empty())
     return;
 
@@ -951,7 +946,7 @@ void WasmObjectWriter::writeDataSection() {
   }
 
   // Apply fixups.
-  applyRelocations(DataRelocations, Section.ContentsOffset);
+  applyRelocations(DataRelocations, Section.ContentsOffset, Layout);
 
   endSection(Section);
 }
@@ -1104,7 +1099,7 @@ void WasmObjectWriter::writeCustomSection(WasmCustomSection &CustomSection,
 
   // Apply fixups.
   auto &Relocations = CustomSectionsRelocations[CustomSection.Section];
-  applyRelocations(Relocations, CustomSection.OutputContentsOffset);
+  applyRelocations(Relocations, CustomSection.OutputContentsOffset, Layout);
 }
 
 uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm &Symbol) {
@@ -1123,8 +1118,8 @@ void WasmObjectWriter::registerFunctionType(const MCSymbolWasm &Symbol) {
   assert(Symbol.isFunction());
 
   WasmSignature S;
-  const MCSymbolWasm *ResolvedSym = resolveSymbol(Symbol);
-  if (auto *Sig = ResolvedSym->getSignature()) {
+
+  if (auto *Sig = Symbol.getSignature()) {
     S.Returns = Sig->Returns;
     S.Params = Sig->Params;
   }
@@ -1221,8 +1216,10 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
 
     // Register types for all functions, including those with private linkage
     // (because wasm always needs a type signature).
-    if (WS.isFunction())
-      registerFunctionType(WS);
+    if (WS.isFunction()) {
+      const MCSymbolWasm *Base = cast<MCSymbolWasm>(Layout.getBaseSymbol(S));
+      registerFunctionType(*Base);
+    }
 
     if (WS.isEvent())
       registerEventType(WS);
@@ -1508,22 +1505,36 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
 
     assert(S.isDefined());
 
+    const MCSymbolWasm *Base = cast<MCSymbolWasm>(Layout.getBaseSymbol(S));
+
     // Find the target symbol of this weak alias and export that index
     const auto &WS = static_cast<const MCSymbolWasm &>(S);
-    const MCSymbolWasm *ResolvedSym = resolveSymbol(WS);
-    LLVM_DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym
-                      << "'\n");
+    LLVM_DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *Base << "'\n");
 
-    if (ResolvedSym->isFunction()) {
-      assert(WasmIndices.count(ResolvedSym) > 0);
-      uint32_t WasmIndex = WasmIndices.find(ResolvedSym)->second;
+    if (Base->isFunction()) {
+      assert(WasmIndices.count(Base) > 0);
+      uint32_t WasmIndex = WasmIndices.find(Base)->second;
       assert(WasmIndices.count(&WS) == 0);
       WasmIndices[&WS] = WasmIndex;
       LLVM_DEBUG(dbgs() << "  -> index:" << WasmIndex << "\n");
-    } else if (ResolvedSym->isData()) {
-      assert(DataLocations.count(ResolvedSym) > 0);
-      const wasm::WasmDataReference &Ref =
-          DataLocations.find(ResolvedSym)->second;
+    } else if (Base->isData()) {
+      auto &DataSection = static_cast<MCSectionWasm &>(WS.getSection());
+      uint64_t Offset = Layout.getSymbolOffset(S);
+      int64_t Size = 0;
+      // For data symbol alias we use the size of the base symbol as the
+      // size of the alias.  When an offset from the base is involved this
+      // can result in a offset + size goes past the end of the data section
+      // which out object format doesn't support.  So we must clamp it.
+      if (!Base->getSize()->evaluateAsAbsolute(Size, Layout))
+        report_fatal_error(".size expression must be evaluatable");
+      const WasmDataSegment &Segment =
+          DataSegments[DataSection.getSegmentIndex()];
+      Size =
+          std::min(static_cast<uint64_t>(Size), Segment.Data.size() - Offset);
+      wasm::WasmDataReference Ref = wasm::WasmDataReference{
+          DataSection.getSegmentIndex(),
+          static_cast<uint32_t>(Layout.getSymbolOffset(S)),
+          static_cast<uint32_t>(Size)};
       DataLocations[&WS] = Ref;
       LLVM_DEBUG(dbgs() << "  -> index:" << Ref.Segment << "\n");
     } else {
@@ -1585,14 +1596,15 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
           Rel.Type != wasm::R_WASM_TABLE_INDEX_REL_SLEB)
         return;
       assert(Rel.Symbol->isFunction());
-      const MCSymbolWasm &WS = *resolveSymbol(*Rel.Symbol);
-      uint32_t FunctionIndex = WasmIndices.find(&WS)->second;
+      const MCSymbolWasm *Base =
+          cast<MCSymbolWasm>(Layout.getBaseSymbol(*Rel.Symbol));
+      uint32_t FunctionIndex = WasmIndices.find(Base)->second;
       uint32_t TableIndex = TableElems.size() + InitialTableOffset;
-      if (TableIndices.try_emplace(&WS, TableIndex).second) {
-        LLVM_DEBUG(dbgs() << "  -> adding " << WS.getName()
+      if (TableIndices.try_emplace(Base, TableIndex).second) {
+        LLVM_DEBUG(dbgs() << "  -> adding " << Base->getName()
                           << " to table: " << TableIndex << "\n");
         TableElems.push_back(FunctionIndex);
-        registerFunctionType(WS);
+        registerFunctionType(*Base);
       }
     };
 
@@ -1681,7 +1693,7 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
   writeElemSection(TableElems);
   writeDataCountSection();
   writeCodeSection(Asm, Layout, Functions);
-  writeDataSection();
+  writeDataSection(Layout);
   for (auto &CustomSection : CustomSections)
     writeCustomSection(CustomSection, Asm, Layout);
   writeLinkingMetaDataSection(SymbolInfos, InitFuncs, Comdats);

diff  --git a/llvm/test/CodeGen/WebAssembly/aliases.ll b/llvm/test/CodeGen/WebAssembly/aliases.ll
new file mode 100644
index 000000000000..45eacc899fe2
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/aliases.ll
@@ -0,0 +1,71 @@
+; Based llvm/test/CodeGen/X86/aliases.ll
+; RUN: llc < %s -mtriple=wasm32-unknown-uknown -asm-verbose=false | FileCheck %s
+
+ at bar = global i32 42
+
+; CHECK-DAG: .globl	foo1
+; CHECK-DAG: .set foo1, bar
+ at foo1 = alias i32, i32* @bar
+
+; CHECK-DAG: .globl	foo2
+; CHECK-DAG: .set foo2, bar
+ at foo2 = alias i32, i32* @bar
+
+%FunTy = type i32()
+
+define i32 @foo_f() {
+  ret i32 0
+}
+
+; CHECK-DAG: .weak	bar_f
+; CHECK-DAG: .type	bar_f, at function
+; CHECK-DAG: .set bar_f, foo_f
+ at bar_f = weak alias %FunTy, %FunTy* @foo_f
+
+; CHECK-DAG: .weak	bar_l
+; CHECK-DAG: .set bar_l, bar
+ at bar_l = linkonce_odr alias i32, i32* @bar
+
+; CHECK-DAG: .set bar_i, bar
+ at bar_i = internal alias i32, i32* @bar
+
+; CHECK-DAG: .globl	A
+ at A = alias i64, bitcast (i32* @bar to i64*)
+
+; CHECK-DAG: .globl	bar_h
+; CHECK-DAG: .hidden	bar_h
+; CHECK-DAG: .set bar_h, bar
+ at bar_h = hidden alias i32, i32* @bar
+
+; CHECK-DAG: .globl	bar_p
+; CHECK-DAG: .protected	bar_p
+; CHECK-DAG: .set bar_p, bar
+ at bar_p = protected alias i32, i32* @bar
+
+; CHECK-DAG: .set test2, bar+4
+ at test2 = alias i32, getelementptr(i32, i32* @bar, i32 1)
+
+; CHECK-DAG: .set test3, 42
+ at test3 = alias i32, inttoptr(i32 42 to i32*)
+
+; CHECK-DAG: .set test4, bar
+ at test4 = alias i32, inttoptr(i64 ptrtoint (i32* @bar to i64) to i32*)
+
+; CHECK-DAG: .set test5, test2-bar
+ at test5 = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @test2 to i32),
+                                 i32 ptrtoint (i32* @bar to i32)) to i32*)
+
+; CHECK-DAG: .globl	test
+define i32 @test() {
+entry:
+   %tmp = load i32, i32* @foo1
+   %tmp1 = load i32, i32* @foo2
+   %tmp0 = load i32, i32* @bar_i
+   %tmp2 = call i32 @foo_f()
+   %tmp3 = add i32 %tmp, %tmp2
+   %tmp4 = call i32 @bar_f()
+   %tmp5 = add i32 %tmp3, %tmp4
+   %tmp6 = add i32 %tmp1, %tmp5
+   %tmp7 = add i32 %tmp6, %tmp0
+   ret i32 %tmp7
+}

diff  --git a/llvm/test/MC/WebAssembly/alias.s b/llvm/test/MC/WebAssembly/alias.s
new file mode 100644
index 000000000000..b0a75399b7ad
--- /dev/null
+++ b/llvm/test/MC/WebAssembly/alias.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj %s | llvm-objdump -t - | FileCheck %s
+
+.section    .data,"",@
+foo:
+  .int32 1
+  .size foo, 4
+sym_a:
+  .int32 2
+  .size sym_a, 4
+
+.set sym_b, sym_a
+
+# CHECK: 00000000 l     O DATA foo
+# CHECK: 00000004 l     O DATA sym_a
+# CHECK: 00000004 l     O DATA sym_b

diff  --git a/llvm/test/MC/WebAssembly/offset.s b/llvm/test/MC/WebAssembly/offset.s
new file mode 100644
index 000000000000..cf3f521dd366
--- /dev/null
+++ b/llvm/test/MC/WebAssembly/offset.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj %s | llvm-objdump -t - | FileCheck %s
+
+.section    .data,"",@
+foo:
+  .int32 0
+  .size foo, 4
+sym_a:
+  .int32 1
+  .int32 2
+  .size sym_a, 8
+
+.set sym_b, sym_a + 4
+
+# CHECK: 00000000 l     O DATA foo
+# CHECK: 00000004 l     O DATA sym_a
+# CHECK: 00000008 l     O DATA sym_b


        


More information about the llvm-commits mailing list