[llvm] c2e272f - [ms] [llvm-ml] Improve data support, adding names and complex initializers.

Eric Astor via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 12:40:27 PST 2020


Author: Eric Astor
Date: 2020-02-24T15:40:04-05:00
New Revision: c2e272f8cf76ec97f675e0dfdada75445bbee5c5

URL: https://github.com/llvm/llvm-project/commit/c2e272f8cf76ec97f675e0dfdada75445bbee5c5
DIFF: https://github.com/llvm/llvm-project/commit/c2e272f8cf76ec97f675e0dfdada75445bbee5c5.diff

LOG: [ms] [llvm-ml] Improve data support, adding names and complex initializers.

Summary: Add support for ?, DUP, and string initializers, as well as MASM syntax for named data locations.

Reviewers: rnk, thakis

Reviewed By: thakis

Subscribers: merge_guards_bot, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73226

Added: 
    llvm/test/tools/llvm-ml/basic_data.test

Modified: 
    llvm/lib/MC/MCParser/MasmParser.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 99ede023f0bd..55a2d43dfe5a 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -482,12 +482,25 @@ class MasmParser : public MCAsmParser {
   /// Codeview def_range types parsed by this class.
   StringMap<CVDefRangeType> CVDefRangeTypeMap;
 
+  bool parseInitValue(unsigned Size);
+
   // ".ascii", ".asciz", ".string"
   bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
-  bool parseDirectiveValue(StringRef IDVal,
-                           unsigned Size);       // "byte", "word", ...
-  bool parseDirectiveRealValue(StringRef IDVal,
-                               const fltSemantics &);  // "real4", ...
+
+  // "byte", "word", ...
+  bool parseScalarInstList(unsigned Size,
+                           SmallVectorImpl<const MCExpr *> &Values);
+  bool parseDirectiveValue(StringRef IDVal, unsigned Size);
+  bool parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name,
+                                SMLoc NameLoc);
+
+  // "real4", "real8"
+  bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics);
+  bool parseRealInstList(const fltSemantics &Semantics,
+                         SmallVectorImpl<APInt> &Values);
+  bool parseDirectiveNamedRealValue(StringRef IDVal,
+                                    const fltSemantics &Semantics,
+                                    StringRef Name, SMLoc NameLoc);
 
   // "=", "equ", "textequ"
   bool parseDirectiveEquate(StringRef IDVal, StringRef Name,
@@ -1903,6 +1916,33 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
   case DK_TEXTEQU:
     Lex();
     return parseDirectiveEquate(nextVal, IDVal, DirKind);
+  case DK_BYTE:
+  case DK_DB:
+    Lex();
+    return parseDirectiveNamedValue(nextVal, 1, IDVal, IDLoc);
+  case DK_WORD:
+  case DK_DW:
+    Lex();
+    return parseDirectiveNamedValue(nextVal, 2, IDVal, IDLoc);
+  case DK_DWORD:
+  case DK_DD:
+    Lex();
+    return parseDirectiveNamedValue(nextVal, 4, IDVal, IDLoc);
+  case DK_FWORD:
+    Lex();
+    return parseDirectiveNamedValue(nextVal, 6, IDVal, IDLoc);
+  case DK_QWORD:
+  case DK_DQ:
+    Lex();
+    return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc);
+  case DK_REAL4:
+    Lex();
+    return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal,
+                                        IDLoc);
+  case DK_REAL8:
+    Lex();
+    return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal,
+                                        IDLoc);
   }
 
   // __asm _emit or __asm __emit
@@ -2739,31 +2779,99 @@ bool MasmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
   return false;
 }
 
+bool MasmParser::parseScalarInstList(unsigned Size,
+                                     SmallVectorImpl<const MCExpr *> &Values) {
+  do {
+    if (getTok().is(AsmToken::String)) {
+      StringRef Value = getTok().getStringContents();
+      if (Size == 1) {
+        // Treat each character as an initializer.
+        for (const char CharVal : Value)
+          Values.push_back(MCConstantExpr::create(CharVal, getContext()));
+      } else {
+        // Treat the string as an initial value in big-endian representation.
+        if (Value.size() > Size)
+          return Error(getTok().getLoc(), "out of range literal value");
+
+        uint64_t IntValue = 0;
+        for (const unsigned char CharVal : Value.bytes())
+          IntValue = (IntValue << 8) | CharVal;
+        Values.push_back(MCConstantExpr::create(IntValue, getContext()));
+      }
+      Lex();
+    } else {
+      const MCExpr *Value;
+      if (checkForValidSection() || parseExpression(Value))
+        return true;
+      if (getTok().is(AsmToken::Identifier) &&
+          getTok().getString().equals_lower("dup")) {
+        Lex();  // eat 'dup'
+        const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+        if (!MCE)
+          return Error(Value->getLoc(),
+                       "cannot repeat value a non-constant number of times");
+        const int64_t Repetitions = MCE->getValue();
+        if (Repetitions < 0)
+          return Error(Value->getLoc(),
+                       "cannot repeat value a negative number of times");
+
+        SmallVector<const MCExpr *, 1> DuplicatedValues;
+        if (parseToken(AsmToken::LParen,
+                       "parentheses required for 'dup' contents") ||
+            parseScalarInstList(Size, DuplicatedValues) ||
+            parseToken(AsmToken::RParen, "unmatched parentheses"))
+          return true;
+
+        for (int i = 0; i < Repetitions; ++i)
+          Values.append(DuplicatedValues.begin(), DuplicatedValues.end());
+      } else {
+        Values.push_back(Value);
+      }
+    }
+
+    // Continue if we see a comma. (Also, allow line continuation.)
+  } while (parseOptionalToken(AsmToken::Comma) &&
+           (getTok().isNot(AsmToken::EndOfStatement) ||
+            !parseToken(AsmToken::EndOfStatement)));
+
+  return false;
+}
+
 /// parseDirectiveValue
 ///  ::= (byte | word | ... ) [ expression (, expression)* ]
 bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
-  auto parseOp = [&]() -> bool {
-    const MCExpr *Value;
-    SMLoc ExprLoc = getLexer().getLoc();
-    if (checkForValidSection() || parseExpression(Value))
-      return true;
+  SmallVector<const MCExpr *, 1> Values;
+  if (parseScalarInstList(Size, Values))
+    return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+
+  for (const MCExpr *Value : Values) {
     // Special case constant expressions to match code generator.
     if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
       assert(Size <= 8 && "Invalid size");
       int64_t IntValue = MCE->getValue();
       if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
-        return Error(ExprLoc, "out of range literal value");
+        return Error(MCE->getLoc(), "out of range literal value");
       getStreamer().emitIntValue(IntValue, Size);
-    } else
-      getStreamer().emitValue(Value, Size, ExprLoc);
-    return false;
-  };
-
-  if (parseMany(parseOp))
-    return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+    } else if (const MCSymbolRefExpr *MSE = dyn_cast<MCSymbolRefExpr>(Value);
+               MSE && MSE->getSymbol().getName() == "?") {
+      // ? initializer; treat as 0.
+      getStreamer().emitIntValue(0, Size);
+    } else {
+      getStreamer().emitValue(Value, Size, Value->getLoc());
+    }
+  }
   return false;
 }
 
+/// parseDirectiveNamedValue
+///  ::= name (byte | word | ... ) [ expression (, expression)* ]
+bool MasmParser::parseDirectiveNamedValue(StringRef IDVal, unsigned Size,
+                                          StringRef Name, SMLoc NameLoc) {
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+  getStreamer().emitLabel(Sym);
+  return parseDirectiveValue(IDVal, Size);
+}
+
 static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) {
   if (Asm.getTok().isNot(AsmToken::Integer) &&
       Asm.getTok().isNot(AsmToken::BigNum))
@@ -2824,24 +2932,75 @@ bool MasmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
   return false;
 }
 
+bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
+                                   SmallVectorImpl<APInt> &ValuesAsInt) {
+  do {
+    const AsmToken NextTok = Lexer.peekTok();
+    if (NextTok.is(AsmToken::Identifier) &&
+        NextTok.getString().equals_lower("dup")) {
+      const MCExpr *Value;
+      if (parseExpression(Value) || parseToken(AsmToken::Identifier))
+        return true;
+      const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+      if (!MCE)
+        return Error(Value->getLoc(),
+                     "cannot repeat value a non-constant number of times");
+      const int64_t Repetitions = MCE->getValue();
+      if (Repetitions < 0)
+        return Error(Value->getLoc(),
+                     "cannot repeat value a negative number of times");
+
+      SmallVector<APInt, 1> DuplicatedValues;
+      if (parseToken(AsmToken::LParen,
+                     "parentheses required for 'dup' contents") ||
+          parseRealInstList(Semantics, DuplicatedValues) ||
+          parseToken(AsmToken::RParen, "unmatched parentheses"))
+        return true;
+
+      for (int i = 0; i < Repetitions; ++i)
+        ValuesAsInt.append(DuplicatedValues.begin(), DuplicatedValues.end());
+    } else {
+      APInt AsInt;
+      if (parseRealValue(Semantics, AsInt))
+        return true;
+      ValuesAsInt.push_back(AsInt);
+    }
+    // Continue if we see a comma. (Also, allow line continuation.)
+  } while (parseOptionalToken(AsmToken::Comma) &&
+           (getTok().isNot(AsmToken::EndOfStatement) ||
+            !parseToken(AsmToken::EndOfStatement)));
+
+  return false;
+}
+
 /// parseDirectiveRealValue
 ///  ::= (real4 | real8) [ expression (, expression)* ]
 bool MasmParser::parseDirectiveRealValue(StringRef IDVal,
                                          const fltSemantics &Semantics) {
-  auto parseOp = [&]() -> bool {
-    APInt AsInt;
-    if (checkForValidSection() || parseRealValue(Semantics, AsInt))
-      return true;
-    getStreamer().emitIntValue(AsInt.getLimitedValue(),
-                               AsInt.getBitWidth() / 8);
-    return false;
-  };
+  if (checkForValidSection())
+    return true;
 
-  if (parseMany(parseOp))
+  SmallVector<APInt, 1> ValuesAsInt;
+  if (parseRealInstList(Semantics, ValuesAsInt))
     return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+
+  for (const APInt &AsInt : ValuesAsInt) {
+    getStreamer().emitIntValue(AsInt.getLimitedValue(),
+                               AsInt.getBitWidth() / 8);
+  }
   return false;
 }
 
+/// parseDirectiveNamedRealValue
+///  ::= name (real4 | real8) [ expression (, expression)* ]
+bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal,
+                                              const fltSemantics &Semantics,
+                                              StringRef Name, SMLoc NameLoc) {
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+  getStreamer().emitLabel(Sym);
+  return parseDirectiveRealValue(IDVal, Semantics);
+}
+
 /// parseDirectiveOrg
 ///  ::= .org expression [ , expression ]
 bool MasmParser::parseDirectiveOrg() {

diff  --git a/llvm/test/tools/llvm-ml/basic_data.test b/llvm/test/tools/llvm-ml/basic_data.test
new file mode 100644
index 000000000000..d39775fb6646
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/basic_data.test
@@ -0,0 +1,38 @@
+# RUN: llvm-ml -filetype=asm %s | FileCheck %s
+
+.data
+BYTE 2, 4, 6, 8
+; CHECK: .data
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .byte	8
+
+BYTE 2 dup (1, 2 dup (2)),
+     3
+; CHECK: .byte	1
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	3
+
+REAL4 1, 0
+; CHECK: .long 1065353216
+; CHECK-NEXT: .long 0
+
+REAL4 2 DUP (2.5, 2 dup (0)),
+      4
+; CHECK: .long 1075838976
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1075838976
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1082130432
+
+.code
+BYTE 5
+; CHECK: .text
+; CHECK-NEXT: .byte	5


        


More information about the llvm-commits mailing list