[llvm] 5afb360 - [ms] [llvm-ml] Allow arbitrary strings as integer constants

Eric Astor via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 6 14:15:57 PST 2020


Author: Eric Astor
Date: 2020-11-06T17:15:49-05:00
New Revision: 5afb36080809567edf5a7936821dcac5a064527b

URL: https://github.com/llvm/llvm-project/commit/5afb36080809567edf5a7936821dcac5a064527b
DIFF: https://github.com/llvm/llvm-project/commit/5afb36080809567edf5a7936821dcac5a064527b.diff

LOG: [ms] [llvm-ml] Allow arbitrary strings as integer constants

MASM interprets strings in expression contexts as integers expressed in big-endian base-256, treating each character as its ASCII representation.

This completely eliminates the need to special-case single-character strings.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D90788

Added: 
    llvm/test/tools/llvm-ml/strings_errors.test

Modified: 
    llvm/lib/MC/MCParser/MasmParser.cpp
    llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
    llvm/test/tools/llvm-ml/strings.test

Removed: 
    


################################################################################
diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 845a3cf13d2b..6df53fc511f4 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1332,6 +1332,8 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
 ///  primaryexpr ::= number
 ///  primaryexpr ::= '.'
 ///  primaryexpr ::= ~,+,-,'not' primaryexpr
+///  primaryexpr ::= string
+///          (a string is interpreted as a 64-bit number in big-endian base-256)
 bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
                                   AsmTypeInfo *TypeInfo) {
   SMLoc FirstTokenLoc = getLexer().getLoc();
@@ -1350,7 +1352,6 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
     return false;
   case AsmToken::Dollar:
   case AsmToken::At:
-  case AsmToken::String:
   case AsmToken::Identifier: {
     StringRef Identifier;
     if (parseIdentifier(Identifier)) {
@@ -1517,6 +1518,20 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
     }
     return false;
   }
+  case AsmToken::String: {
+    // MASM strings (used as constants) are interpreted as big-endian base-256.
+    SMLoc ValueLoc = getTok().getLoc();
+    std::string Value;
+    if (parseEscapedString(Value))
+      return true;
+    if (Value.size() > 8)
+      return Error(ValueLoc, "literal value out of range");
+    uint64_t IntValue = 0;
+    for (const unsigned char CharVal : Value)
+      IntValue = (IntValue << 8) | CharVal;
+    Res = MCConstantExpr::create(IntValue, getContext());
+    return false;
+  }
   case AsmToken::Real: {
     APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
@@ -3168,28 +3183,17 @@ bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) {
 bool MasmParser::parseScalarInitializer(unsigned Size,
                                         SmallVectorImpl<const MCExpr *> &Values,
                                         unsigned StringPadLength) {
-  if (getTok().is(AsmToken::String)) {
+  if (Size == 1 && getTok().is(AsmToken::String)) {
     std::string Value;
     if (parseEscapedString(Value))
       return true;
-    if (Size == 1) {
-      // Treat each character as an initializer.
-      for (const char CharVal : Value)
-        Values.push_back(MCConstantExpr::create(CharVal, getContext()));
-
-      // Pad the string with spaces to the specified length.
-      for (size_t i = Value.size(); i < StringPadLength; ++i)
-        Values.push_back(MCConstantExpr::create(' ', getContext()));
-    } else {
-      // Treat the string as an initial value in big-endian representation.
-      if (Value.size() > Size)
-        return Error(getTok().getLoc(), "out of range literal value");
-
-      uint64_t IntValue = 0;
-      for (const unsigned char CharVal : Value)
-        IntValue = (IntValue << 8) | CharVal;
-      Values.push_back(MCConstantExpr::create(IntValue, getContext()));
-    }
+    // Treat each character as an initializer.
+    for (const unsigned char CharVal : Value)
+      Values.push_back(MCConstantExpr::create(CharVal, getContext()));
+
+    // Pad the string with spaces to the specified length.
+    for (size_t i = Value.size(); i < StringPadLength; ++i)
+      Values.push_back(MCConstantExpr::create(' ', getContext()));
   } else {
     const MCExpr *Value;
     if (parseExpression(Value))

diff  --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6d037ca14523..fd7beea803fd 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1693,20 +1693,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
         return Error(Tok.getLoc(), "unknown token in expression");
       }
       LLVM_FALLTHROUGH;
+    case AsmToken::String: {
+      if (Parser.isParsingMasm()) {
+        // MASM parsers handle strings in expressions as constants.
+        SMLoc ValueLoc = Tok.getLoc();
+        int64_t Res;
+        const MCExpr *Val;
+        if (Parser.parsePrimaryExpr(Val, End, nullptr))
+          return true;
+        UpdateLocLex = false;
+        if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
+          return Error(ValueLoc, "expected absolute value");
+        if (SM.onInteger(Res, ErrMsg))
+          return Error(ValueLoc, ErrMsg);
+        break;
+      }
+      LLVM_FALLTHROUGH;
+    }
     case AsmToken::At:
-    case AsmToken::String:
     case AsmToken::Identifier: {
-      if (Parser.isParsingMasm() && Tok.is(AsmToken::String)) {
-        // Single-character strings should be treated as integer constants. This
-        // includes MASM escapes for quotes.
-        char Quote = Tok.getString().front();
-        StringRef Contents = Tok.getStringContents();
-        if (Contents.size() == 1 || Contents == std::string(2, Quote)) {
-          if (SM.onInteger(Contents.front(), ErrMsg))
-            return Error(Tok.getLoc(), ErrMsg);
-          break;
-        }
-      }
       SMLoc IdentLoc = Tok.getLoc();
       StringRef Identifier = Tok.getString();
       UpdateLocLex = false;

diff  --git a/llvm/test/tools/llvm-ml/strings.test b/llvm/test/tools/llvm-ml/strings.test
index 5064a458b7ec..c43f39d42c3f 100644
--- a/llvm/test/tools/llvm-ml/strings.test
+++ b/llvm/test/tools/llvm-ml/strings.test
@@ -119,4 +119,25 @@ dq_char_test PROC
   ret
 dq_char_test ENDP
 
+string_constant_test PROC
+; CHECK-LABEL: string_constant_test:
+
+  mov eax, 'ab'
+  mov eax, "ab"
+; CHECK: mov eax, 24930
+; CHECK: mov eax, 24930
+
+  mov eax, "abc"
+  mov eax, 'abc'
+; CHECK: mov eax, 6382179
+; CHECK: mov eax, 6382179
+
+  mov eax, "abc"""
+  mov eax, 'abc'''
+; CHECK: mov eax, 1633837858
+; CHECK: mov eax, 1633837863
+
+  ret
+string_constant_test ENDP
+
 end

diff  --git a/llvm/test/tools/llvm-ml/strings_errors.test b/llvm/test/tools/llvm-ml/strings_errors.test
new file mode 100644
index 000000000000..377ad29c06ff
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/strings_errors.test
@@ -0,0 +1,15 @@
+; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --implicit-check-not=error:
+
+.code
+
+oversize_string_test PROC
+
+  mov rax, "abcdefghi"
+  mov rax, 'abcdefghi'
+; CHECK: error: literal value out of range
+; CHECK: error: literal value out of range
+
+  ret
+oversize_string_test ENDP
+
+end


        


More information about the llvm-commits mailing list