[llvm] r348185 - [WebAssembly] Enforce assembler emits to streamer in order.

Wouter van Oortmerssen via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 3 12:30:28 PST 2018


Author: aardappel
Date: Mon Dec  3 12:30:28 2018
New Revision: 348185

URL: http://llvm.org/viewvc/llvm-project?rev=348185&view=rev
Log:
[WebAssembly] Enforce assembler emits to streamer in order.

Summary:
The assembler processes directives and instructions in whatever order
they are in the file, then directly emits them to the streamer. This
could cause badly written (or generated) .s files to produce
incorrect binaries.

It now has state that tracks what it has most recently seen, to
enforce they are emitted in a given order that always produces
correct wasm binaries.

Also added a new test that compares obj2yaml output from llc (the
backend) to that going via .s and the assembler to ensure both paths
generate the same binaries.

The features this test covers could be extended.

Passes all wasm Lit tests.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=39557

Reviewers: sbc100, dschuff, aheejin

Subscribers: jgravelle-google, sunfish, llvm-commits

Differential Revision: https://reviews.llvm.org/D55149

Added:
    llvm/trunk/test/MC/WebAssembly/assembler-binary.ll
Modified:
    llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp

Modified: llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp?rev=348185&r1=348184&r2=348185&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp Mon Dec  3 12:30:28 2018
@@ -136,6 +136,24 @@ class WebAssemblyAsmParser final : publi
   // Much like WebAssemblyAsmPrinter in the backend, we have to own these.
   std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
 
+  // Order of labels, directives and instructions in a .s file have no
+  // syntactical enforcement. This class is a callback from the actual parser,
+  // and yet we have to be feeding data to the streamer in a very particular
+  // order to ensure a correct binary encoding that matches the regular backend
+  // (the streamer does not enforce this). This "state machine" enum helps
+  // guarantee that correct order.
+  enum ParserState {
+    FileStart,
+    Label,
+    FunctionStart,
+    FunctionLocals,
+    Instructions,
+  } CurrentState = FileStart;
+
+  // We track this to see if a .functype following a label is the same,
+  // as this is how we recognize the start of a function.
+  MCSymbol *LastLabel = nullptr;
+
 public:
   WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
                        const MCInstrInfo &MII, const MCTargetOptions &Options)
@@ -334,6 +352,11 @@ public:
     return false;
   }
 
+  void onLabelParsed(MCSymbol *Symbol) override {
+    LastLabel = Symbol;
+    CurrentState = Label;
+  }
+
   // This function processes wasm-specific directives streamed to
   // WebAssemblyTargetStreamer, all others go to the generic parser
   // (see WasmAsmParser).
@@ -370,10 +393,19 @@ public:
       TOut.emitGlobalType(WasmSym);
       return Expect(AsmToken::EndOfStatement, "EOL");
     } else if (DirectiveID.getString() == ".functype") {
+      // This code has to send things to the streamer similar to
+      // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
+      // TODO: would be good to factor this into a common function, but the
+      // assembler and backend really don't share any common code, and this code
+      // parses the locals seperately.
       auto SymName = ExpectIdent();
       if (SymName.empty()) return true;
       auto WasmSym = cast<MCSymbolWasm>(
                     TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
+      if (CurrentState == Label && WasmSym == LastLabel) {
+        // This .functype indicates a start of a function.
+        CurrentState = FunctionStart;
+      }
       auto Signature = make_unique<wasm::WasmSignature>();
       if (Expect(AsmToken::LParen, "(")) return true;
       if (ParseRegTypeList(Signature->Params)) return true;
@@ -386,11 +418,16 @@ public:
       addSignature(std::move(Signature));
       WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
       TOut.emitFunctionType(WasmSym);
+      // TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
       return Expect(AsmToken::EndOfStatement, "EOL");
     } else if (DirectiveID.getString() == ".local") {
+      if (CurrentState != FunctionStart)
+        return Error(".local directive should follow the start of a function",
+                     Lexer.getTok());
       SmallVector<wasm::ValType, 4> Locals;
       if (ParseRegTypeList(Locals)) return true;
       TOut.emitLocal(Locals);
+      CurrentState = FunctionLocals;
       return Expect(AsmToken::EndOfStatement, "EOL");
     }
     return true;  // We didn't process this directive.
@@ -405,6 +442,16 @@ public:
         MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
     switch (MatchResult) {
     case Match_Success: {
+      if (CurrentState == FunctionStart) {
+        // This is the first instruction in a function, but we haven't seen
+        // a .local directive yet. The streamer requires locals to be encoded
+        // as a prelude to the instructions, so emit an empty list of locals
+        // here.
+        auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
+            *Out.getTargetStreamer());
+        TOut.emitLocal(SmallVector<wasm::ValType, 0>());
+      }
+      CurrentState = Instructions;
       Out.EmitInstruction(Inst, getSTI());
       return false;
     }

Added: llvm/trunk/test/MC/WebAssembly/assembler-binary.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/WebAssembly/assembler-binary.ll?rev=348185&view=auto
==============================================================================
--- llvm/trunk/test/MC/WebAssembly/assembler-binary.ll (added)
+++ llvm/trunk/test/MC/WebAssembly/assembler-binary.ll Mon Dec  3 12:30:28 2018
@@ -0,0 +1,92 @@
+; RUN: llc -filetype=asm -asm-verbose=false %s -o %t.s
+; RUN: FileCheck -check-prefix=ASM -input-file %t.s %s
+; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=asm %t.s -o - | FileCheck -check-prefix=ASM %s
+; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s
+; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj %t.s -o - | obj2yaml | FileCheck %s
+
+; This specifically tests that we can generate a binary from the assembler
+; that produces the same binary as the backend would.
+
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare void @bar()
+
+define void @foo(i32 %n) {
+entry:
+  call void @bar()
+  ret void
+}
+
+; Checking assembly is not the point of this test, but if something breaks
+; it is easier to spot it here than in the yaml output.
+
+; ASM:     	.text
+; ASM:      	.file	"assembler-binary.ll"
+; ASM:      	.globl	foo
+; ASM:      foo:
+; ASM-NEXT: 	.functype	foo (i32) -> ()
+; ASM-NEXT: 	call	bar at FUNCTION
+; ASM-NEXT: 	end_function
+; ASM-NEXT: .Lfunc_end0:
+; ASM-NEXT: 	.size	foo, .Lfunc_end0-foo
+; ASM:       	.functype	bar () -> ()
+
+
+; CHECK:      --- !WASM
+; CHECK-NEXT: FileHeader:
+; CHECK-NEXT:   Version:         0x00000001
+; CHECK-NEXT: Sections:
+; CHECK-NEXT:   - Type:            TYPE
+; CHECK-NEXT:     Signatures:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ReturnType:      NORESULT
+; CHECK-NEXT:         ParamTypes:
+; CHECK-NEXT:           - I32
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         ReturnType:      NORESULT
+; CHECK-NEXT:         ParamTypes:      []
+; CHECK-NEXT:   - Type:            IMPORT
+; CHECK-NEXT:     Imports:
+; CHECK-NEXT:       - Module:          env
+; CHECK-NEXT:         Field:           __linear_memory
+; CHECK-NEXT:         Kind:            MEMORY
+; CHECK-NEXT:         Memory:
+; CHECK-NEXT:           Initial:         0x00000000
+; CHECK-NEXT:       - Module:          env
+; CHECK-NEXT:         Field:           __indirect_function_table
+; CHECK-NEXT:         Kind:            TABLE
+; CHECK-NEXT:         Table:
+; CHECK-NEXT:           ElemType:        ANYFUNC
+; CHECK-NEXT:           Limits:
+; CHECK-NEXT:             Initial:         0x00000000
+; CHECK-NEXT:       - Module:          env
+; CHECK-NEXT:         Field:           bar
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         SigIndex:        1
+; CHECK-NEXT:   - Type:            FUNCTION
+; CHECK-NEXT:     FunctionTypes:   [ 0 ]
+; CHECK-NEXT:   - Type:            CODE
+; CHECK-NEXT:     Relocations:
+; CHECK-NEXT:       - Type:            R_WEBASSEMBLY_FUNCTION_INDEX_LEB
+; CHECK-NEXT:         Index:           1
+; CHECK-NEXT:         Offset:          0x00000004
+; CHECK-NEXT:     Functions:
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Locals:          []
+; CHECK-NEXT:         Body:            1080808080000B
+; CHECK-NEXT:   - Type:            CUSTOM
+; CHECK-NEXT:     Name:            linking
+; CHECK-NEXT:     Version:         1
+; CHECK-NEXT:     SymbolTable:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         Name:            foo
+; CHECK-NEXT:         Flags:           [  ]
+; CHECK-NEXT:         Function:        1
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         Name:            bar
+; CHECK-NEXT:         Flags:           [ UNDEFINED ]
+; CHECK-NEXT:         Function:        0
+; CHECK-NEXT: ...




More information about the llvm-commits mailing list