[llvm] r346047 - [WebAssembly] Parsing missing directives to produce valid .o

Wouter van Oortmerssen via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 2 15:04:33 PDT 2018


Author: aardappel
Date: Fri Nov  2 15:04:33 2018
New Revision: 346047

URL: http://llvm.org/viewvc/llvm-project?rev=346047&view=rev
Log:
[WebAssembly] Parsing missing directives to produce valid .o

Summary:
The assembler was able to assemble and then dump back to .s, but
was failing to parse certain directives necessary for valid .o
output:
- .type directives are now recognized to distinguish function symbols
  and others.
- .size is now parsed to provide function size.
- .globaltype (introduced in https://reviews.llvm.org/D54012) is now
  recognized to ensure symbols like __stack_pointer have a proper type
  set for both .s and .o output.

Also added tests for the above.

Reviewers: sbc100, dschuff

Subscribers: jgravelle-google, aheejin, dexonsmith, kristina, llvm-commits, sunfish

Differential Revision: https://reviews.llvm.org/D53842

Modified:
    llvm/trunk/lib/MC/MCWasmStreamer.cpp
    llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
    llvm/trunk/test/MC/WebAssembly/basic-assembly.s

Modified: llvm/trunk/lib/MC/MCWasmStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCWasmStreamer.cpp?rev=346047&r1=346046&r2=346047&view=diff
==============================================================================
--- llvm/trunk/lib/MC/MCWasmStreamer.cpp (original)
+++ llvm/trunk/lib/MC/MCWasmStreamer.cpp Fri Nov  2 15:04:33 2018
@@ -61,7 +61,7 @@ void MCWasmStreamer::EmitAssemblerFlag(M
 void MCWasmStreamer::ChangeSection(MCSection *Section,
                                    const MCExpr *Subsection) {
   MCAssembler &Asm = getAssembler();
-  auto *SectionWasm = static_cast<const MCSectionWasm *>(Section);
+  auto *SectionWasm = cast<MCSectionWasm>(Section);
   const MCSymbol *Grp = SectionWasm->getGroup();
   if (Grp)
     Asm.registerSymbol(*Grp);

Modified: llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp?rev=346047&r1=346046&r2=346047&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp Fri Nov  2 15:04:33 2018
@@ -25,6 +25,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -131,14 +132,14 @@ struct WebAssemblyOperand : public MCPar
 class WebAssemblyAsmParser final : public MCTargetAsmParser {
   MCAsmParser &Parser;
   MCAsmLexer &Lexer;
-  MCSymbol *LastLabel;
+  MCSymbolWasm *LastSymbol;
 
 public:
-  WebAssemblyAsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
-                       const MCInstrInfo &mii, const MCTargetOptions &Options)
-      : MCTargetAsmParser(Options, sti, mii), Parser(Parser),
-        Lexer(Parser.getLexer()), LastLabel(nullptr) {
-    setAvailableFeatures(ComputeAvailableFeatures(sti.getFeatureBits()));
+  WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+                       const MCInstrInfo &MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(Options, STI, MII), Parser(Parser),
+        Lexer(Parser.getLexer()), LastSymbol(nullptr) {
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
   }
 
 #define GET_ASSEMBLER_HEADER
@@ -168,24 +169,26 @@ public:
     return false;
   }
 
-  MVT::SimpleValueType ParseRegType(const StringRef &RegType) {
+
+  std::pair<MVT::SimpleValueType, unsigned>
+  ParseRegType(const StringRef &RegType) {
     // Derive type from .param .local decls, or the instruction itself.
-    return StringSwitch<MVT::SimpleValueType>(RegType)
-        .Case("i32", MVT::i32)
-        .Case("i64", MVT::i64)
-        .Case("f32", MVT::f32)
-        .Case("f64", MVT::f64)
-        .Case("i8x16", MVT::v16i8)
-        .Case("i16x8", MVT::v8i16)
-        .Case("i32x4", MVT::v4i32)
-        .Case("i64x2", MVT::v2i64)
-        .Case("f32x4", MVT::v4f32)
-        .Case("f64x2", MVT::v2f64)
+    return StringSwitch<std::pair<MVT::SimpleValueType, unsigned>>(RegType)
+        .Case("i32", {MVT::i32, wasm::WASM_TYPE_I32})
+        .Case("i64", {MVT::i64, wasm::WASM_TYPE_I64})
+        .Case("f32", {MVT::f32, wasm::WASM_TYPE_F32})
+        .Case("f64", {MVT::f64, wasm::WASM_TYPE_F64})
+        .Case("i8x16", {MVT::v16i8, wasm::WASM_TYPE_V128})
+        .Case("i16x8", {MVT::v8i16, wasm::WASM_TYPE_V128})
+        .Case("i32x4", {MVT::v4i32, wasm::WASM_TYPE_V128})
+        .Case("i64x2", {MVT::v2i64, wasm::WASM_TYPE_V128})
+        .Case("f32x4", {MVT::v4f32, wasm::WASM_TYPE_V128})
+        .Case("f64x2", {MVT::v2f64, wasm::WASM_TYPE_V128})
         // arbitrarily chosen vector type to associate with "v128"
         // FIXME: should these be EVTs to avoid this arbitrary hack? Do we want
         // to accept more specific SIMD register types?
-        .Case("v128", MVT::v16i8)
-        .Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
+        .Case("v128", {MVT::v16i8, wasm::WASM_TYPE_V128})
+        .Default({MVT::INVALID_SIMPLE_VALUE_TYPE, wasm::WASM_TYPE_NORESULT});
   }
 
   void ParseSingleInteger(bool IsNegative, OperandVector &Operands) {
@@ -311,24 +314,84 @@ public:
     return false;
   }
 
-  void onLabelParsed(MCSymbol *Symbol) override { LastLabel = Symbol; }
+  void onLabelParsed(MCSymbol *Symbol) override {
+    LastSymbol = cast<MCSymbolWasm>(Symbol);
+  }
 
   bool ParseDirective(AsmToken DirectiveID) override {
+    // This function has a really weird return value behavior that is different
+    // from all the other parsing functions:
+    // - return true && no tokens consumed -> don't know this directive / let
+    //   the generic parser handle it.
+    // - return true && tokens consumed -> a parsing error occurred.
+    // - return false -> processed this directive successfully.
     assert(DirectiveID.getKind() == AsmToken::Identifier);
     auto &Out = getStreamer();
     auto &TOut =
         reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
-    // TODO: we're just parsing the subset of directives we're interested in,
-    // and ignoring ones we don't recognise. We should ideally verify
-    // all directives here.
+    // TODO: any time we return an error, at least one token must have been
+    // consumed, otherwise this will not signal an error to the caller.
     if (DirectiveID.getString() == ".type") {
       // This could be the start of a function, check if followed by
       // "label, at function"
-      if (!(IsNext(AsmToken::Identifier) && IsNext(AsmToken::Comma) &&
-            IsNext(AsmToken::At) && Lexer.is(AsmToken::Identifier)))
+      if (!Lexer.is(AsmToken::Identifier))
+        return Error("Expected label after .type directive, got: ",
+                     Lexer.getTok());
+      auto WasmSym = cast<MCSymbolWasm>(
+                       TOut.getStreamer().getContext().getOrCreateSymbol(
+                         Lexer.getTok().getString()));
+      Parser.Lex();
+      if (!(IsNext(AsmToken::Comma) && IsNext(AsmToken::At) &&
+            Lexer.is(AsmToken::Identifier)))
         return Error("Expected label, at type declaration, got: ", Lexer.getTok());
+      auto TypeName = Lexer.getTok().getString();
+      if (TypeName == "function")
+        WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
+      else if (TypeName == "global")
+        WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+      else
+        return Error("Unknown WASM symbol type: ", Lexer.getTok());
+      Parser.Lex();
+      return Expect(AsmToken::EndOfStatement, "EOL");
+    } else if (DirectiveID.getString() == ".size") {
+      if (!Lexer.is(AsmToken::Identifier))
+        return Error("Expected label after .size directive, got: ",
+                     Lexer.getTok());
+      auto WasmSym = cast<MCSymbolWasm>(
+                       TOut.getStreamer().getContext().getOrCreateSymbol(
+                         Lexer.getTok().getString()));
       Parser.Lex();
-      // Out.EmitSymbolAttribute(??, MCSA_ELF_TypeFunction);
+      if (!IsNext(AsmToken::Comma))
+        return Error("Expected `,`, got: ", Lexer.getTok());
+      const MCExpr *Exp;
+      if (Parser.parseExpression(Exp))
+        return Error("Cannot parse .size expression: ", Lexer.getTok());
+      WasmSym->setSize(Exp);
+      return Expect(AsmToken::EndOfStatement, "EOL");
+    } else if (DirectiveID.getString() == ".globaltype") {
+      if (!Lexer.is(AsmToken::Identifier))
+        return Error("Expected symbol name after .globaltype directive, got: ",
+                     Lexer.getTok());
+      auto Name = Lexer.getTok().getString();
+      Parser.Lex();
+      if (!IsNext(AsmToken::Comma))
+        return Error("Expected `,`, got: ", Lexer.getTok());
+      if (!Lexer.is(AsmToken::Identifier))
+        return Error("Expected type in .globaltype directive, got: ",
+                     Lexer.getTok());
+      auto Type = ParseRegType(Lexer.getTok().getString()).second;
+      if (Type == wasm::WASM_TYPE_NORESULT)
+        return Error("Unknown type in .globaltype directive: ",
+                     Lexer.getTok());
+      Parser.Lex();
+      // Now set this symbol with the correct type.
+      auto WasmSym = cast<MCSymbolWasm>(
+                       TOut.getStreamer().getContext().getOrCreateSymbol(Name));
+      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+      WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), true});
+      // And emit the directive again.
+      TOut.emitGlobalType(WasmSym);
+      return Expect(AsmToken::EndOfStatement, "EOL");
     } else if (DirectiveID.getString() == ".param" ||
                DirectiveID.getString() == ".local") {
       // Track the number of locals, needed for correct virtual register
@@ -337,7 +400,7 @@ public:
       std::vector<MVT> Params;
       std::vector<MVT> Locals;
       while (Lexer.is(AsmToken::Identifier)) {
-        auto RegType = ParseRegType(Lexer.getTok().getString());
+        auto RegType = ParseRegType(Lexer.getTok().getString()).first;
         if (RegType == MVT::INVALID_SIMPLE_VALUE_TYPE)
           return true;
         if (DirectiveID.getString() == ".param") {
@@ -349,15 +412,20 @@ public:
         if (!IsNext(AsmToken::Comma))
           break;
       }
-      assert(LastLabel);
-      TOut.emitParam(LastLabel, Params);
+      assert(LastSymbol);
+      // TODO: LastSymbol isn't even used by emitParam, so could be removed.
+      TOut.emitParam(LastSymbol, Params);
       TOut.emitLocal(Locals);
+      return Expect(AsmToken::EndOfStatement, "EOL");
     } else {
-      // For now, ignore anydirective we don't recognize:
+      // TODO: remove.
       while (Lexer.isNot(AsmToken::EndOfStatement))
         Parser.Lex();
+      return Expect(AsmToken::EndOfStatement, "EOL");
     }
-    return Expect(AsmToken::EndOfStatement, "EOL");
+    // TODO: current ELF directive parsing is broken, fix this is a followup.
+    //return true;  // We didn't process this directive.
+    return false;
   }
 
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned & /*Opcode*/,

Modified: llvm/trunk/test/MC/WebAssembly/basic-assembly.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/WebAssembly/basic-assembly.s?rev=346047&r1=346046&r2=346047&view=diff
==============================================================================
--- llvm/trunk/test/MC/WebAssembly/basic-assembly.s (original)
+++ llvm/trunk/test/MC/WebAssembly/basic-assembly.s Fri Nov  2 15:04:33 2018
@@ -1,4 +1,6 @@
 # RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s | FileCheck %s
+# this one is just here to see if it converts to .o without errors, but doesn't check any output:
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+simd128,+nontrapping-fptoint,+exception-handling < %s
 
     .text
     .type    test0, at function
@@ -56,7 +58,9 @@ test0:
     #i32.trunc_s:sat/f32
     get_global  __stack_pointer at GLOBAL
     end_function
-
+.Lfunc_end0:
+	.size	test0, .Lfunc_end0-test0
+    .globaltype	__stack_pointer, i32
 
 # CHECK:           .text
 # CHECK-LABEL: test0:
@@ -104,3 +108,5 @@ test0:
 # CHECK-NEXT:      end_try
 # CHECK-NEXT:      get_global  __stack_pointer at GLOBAL
 # CHECK-NEXT:      end_function
+
+# CHECK:           .globaltype	__stack_pointer, i32




More information about the llvm-commits mailing list