[llvm] r314493 - [X86][MS-InlineAsm] Extended support for variables / identifiers on memory / immediate expressions

Coby Tayree via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 29 00:02:46 PDT 2017


Author: coby
Date: Fri Sep 29 00:02:46 2017
New Revision: 314493

URL: http://llvm.org/viewvc/llvm-project?rev=314493&view=rev
Log:
[X86][MS-InlineAsm] Extended support for variables / identifiers on memory / immediate expressions

Allow the proper recognition of Enum values and global variables inside ms inline-asm memory / immediate expressions, as they require some additional overhead and treated incorrect if doesn't early recognized.
supersedes D33278, D35774

Differential Revision: https://reviews.llvm.org/D37412

Modified:
    llvm/trunk/include/llvm/MC/MCParser/MCAsmParser.h
    llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp

Modified: llvm/trunk/include/llvm/MC/MCParser/MCAsmParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCParser/MCAsmParser.h?rev=314493&r1=314492&r2=314493&view=diff
==============================================================================
--- llvm/trunk/include/llvm/MC/MCParser/MCAsmParser.h (original)
+++ llvm/trunk/include/llvm/MC/MCParser/MCAsmParser.h Fri Sep 29 00:02:46 2017
@@ -34,19 +34,60 @@ class MCStreamer;
 class MCTargetAsmParser;
 class SourceMgr;
 
-class InlineAsmIdentifierInfo {
-public:
-  void *OpDecl;
-  bool IsVarDecl;
-  unsigned Length, Size, Type;
-
-  void clear() {
-    OpDecl = nullptr;
-    IsVarDecl = false;
-    Length = 1;
-    Size = 0;
-    Type = 0;
+struct InlineAsmIdentifierInfo {
+  enum IdKind {
+    IK_Invalid,  // Initial state. Unexpected after a successful parsing.
+    IK_Label,    // Function/Label reference.
+    IK_EnumVal,  // Value of enumration type.
+    IK_Var       // Variable.
+  };
+  // Represents an Enum value
+  struct EnumIdentifier {
+    int64_t EnumVal;
+  };
+  // Represents a label/function reference
+  struct LabelIdentifier {
+    void *Decl;
+  };
+  // Represents a variable
+  struct VariableIdentifier {
+    void *Decl;
+    bool IsGlobalLV;
+    unsigned Length;
+    unsigned Size;
+    unsigned Type;
+  };
+  // An InlineAsm identifier can only be one of those
+  union {
+    EnumIdentifier Enum;
+    LabelIdentifier Label;
+    VariableIdentifier Var;
+  };
+  bool isKind(IdKind kind) const { return Kind == kind; }
+  // Initializers
+  void setEnum(int64_t enumVal) {
+    assert(isKind(IK_Invalid) && "should be initialized only once");
+    Kind = IK_EnumVal;
+    Enum.EnumVal = enumVal;
+  }
+  void setLabel(void *decl) {
+    assert(isKind(IK_Invalid) && "should be initialized only once");
+    Kind = IK_Label;
+    Label.Decl = decl;
+  }
+  void setVar(void *decl, bool isGlobalLV, unsigned size, unsigned type) {
+    assert(isKind(IK_Invalid) && "should be initialized only once");
+    Kind = IK_Var;
+    Var.Decl = decl;
+    Var.IsGlobalLV = isGlobalLV;
+    Var.Size = size;
+    Var.Type = type;
+    Var.Length = size / type;
   }
+  InlineAsmIdentifierInfo() : Kind(IK_Invalid) {}
+private:
+  // Discrimint using the current kind
+  IdKind Kind;
 };
 
 /// \brief Generic Sema callback for assembly parser.
@@ -54,9 +95,9 @@ class MCAsmParserSemaCallback {
 public:
   virtual ~MCAsmParserSemaCallback();
 
-  virtual void *LookupInlineAsmIdentifier(StringRef &LineBuf,
-                                          InlineAsmIdentifierInfo &Info,
-                                          bool IsUnevaluatedContext) = 0;
+  virtual void LookupInlineAsmIdentifier(StringRef &LineBuf,
+                                         InlineAsmIdentifierInfo &Info,
+                                         bool IsUnevaluatedContext) = 0;
   virtual StringRef LookupInlineAsmLabel(StringRef Identifier, SourceMgr &SM,
                                          SMLoc Location, bool Create) = 0;
   virtual bool LookupInlineAsmField(StringRef Base, StringRef Member,

Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=314493&r1=314492&r2=314493&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original)
+++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Fri Sep 29 00:02:46 2017
@@ -339,9 +339,7 @@ private:
     IntelExprStateMachine()
         : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
           TmpReg(0), Scale(1), Imm(0), Sym(nullptr), BracCount(0),
-          MemExpr(false) {
-      Info.clear();
-    }
+          MemExpr(false) {}
 
     void addImm(int64_t imm) { Imm += imm; }
     short getBracCount() { return BracCount; }
@@ -580,7 +578,15 @@ private:
       return false;
     }
     bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
-                          StringRef &ErrMsg) {
+                          const InlineAsmIdentifierInfo &IDInfo,
+                          bool ParsingInlineAsm, StringRef &ErrMsg) {
+      // InlineAsm: Treat an enum value as an integer
+      if (ParsingInlineAsm)
+        if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
+          return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
+      // Treat a symbolic constant like an integer
+      if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
+        return onInteger(CE->getValue(), ErrMsg);
       PrevState = State;
       bool HasSymbol = Sym != nullptr;
       switch (State) {
@@ -592,11 +598,13 @@ private:
       case IES_NOT:
       case IES_INIT:
       case IES_LBRAC:
-        MemExpr = !(SymRef->getKind() == MCExpr::Constant);
+        MemExpr = true;
         State = IES_INTEGER;
         Sym = SymRef;
         SymName = SymRefName;
         IC.pushOperand(IC_IMM);
+        if (ParsingInlineAsm)
+          Info = IDInfo;
         break;
       }
       if (HasSymbol)
@@ -1261,38 +1269,43 @@ std::unique_ptr<X86Operand> X86AsmParser
     const InlineAsmIdentifierInfo &Info) {
   // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
   // some other label reference.
-  if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
+  if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
     // Insert an explicit size if the user didn't have one.
     if (!Size) {
       Size = getPointerWidth();
       InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
                                           /*Len=*/0, Size);
     }
-
     // Create an absolute memory reference in order to match against
     // instructions taking a PC relative operand.
     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
-                                 Identifier, Info.OpDecl);
+                                 Identifier, Info.Label.Decl);
   }
-
-
   // We either have a direct symbol reference, or an offset from a symbol.  The
   // parser always puts the symbol on the LHS, so look there for size
   // calculation purposes.
   unsigned FrontendSize = 0;
-  const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
-  bool IsSymRef =
-      isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
-  if (IsSymRef && !Size && Info.Type)
-    FrontendSize = Info.Type * 8; // Size is in terms of bits in this context.
-
-  // When parsing inline assembly we set the base register to a non-zero value
+  void *Decl = nullptr;
+  bool IsGlobalLV = false;
+  if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
+    // Size is in terms of bits in this context.
+    FrontendSize = Info.Var.Type * 8;
+    Decl = Info.Var.Decl;
+    IsGlobalLV = Info.Var.IsGlobalLV;
+  }
+  // It is widely common for MS InlineAsm to use a global variable and one/two
+  // registers in a mmory expression, and though unaccessible via rip/eip.
+  if (IsGlobalLV && (BaseReg || IndexReg)) {
+    return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
+  // Otherwise, we set the base register to a non-zero value
   // if we don't know the actual value at this time.  This is necessary to
   // get the matching correct in some cases.
-  BaseReg = BaseReg ? BaseReg : 1;
-  return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
-                               IndexReg, Scale, Start, End, Size, Identifier,
-                               Info.OpDecl, FrontendSize);
+  } else {
+    BaseReg = BaseReg ? BaseReg : 1;
+    return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
+                                 IndexReg, Scale, Start, End, Size, Identifier,
+                                 Decl, FrontendSize);
+  }
 }
 
 // Some binary bitwise operators have a named synonymous
@@ -1348,44 +1361,53 @@ bool X86AsmParser::ParseIntelExpression(
       break;
     case AsmToken::String:
     case AsmToken::Identifier: {
-      // This could be a register or a symbolic displacement.
-      unsigned TmpReg;
-      const MCExpr *Val;
       SMLoc IdentLoc = Tok.getLoc();
       StringRef Identifier = Tok.getString();
       UpdateLocLex = false;
-      if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
-        if (SM.onRegister(TmpReg, ErrMsg))
+      // Register
+      unsigned Reg;
+      if (Tok.isNot(AsmToken::String) && !ParseRegister(Reg, IdentLoc, End)) {
+        if (SM.onRegister(Reg, ErrMsg))
           return Error(Tok.getLoc(), ErrMsg);
-      } else if (ParseIntelNamedOperator(Identifier, SM)) {
-        UpdateLocLex = true;
-      } else if (!isParsingInlineAsm()) {
-        if (getParser().parsePrimaryExpr(Val, End))
+        break;
+      }
+      // Operator synonymous ("not", "or" etc.)
+      if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
+        break;
+      // Symbol reference, when parsing assembly content
+      InlineAsmIdentifierInfo Info;
+      const MCExpr *Val;
+      if (!isParsingInlineAsm()) {
+        if (getParser().parsePrimaryExpr(Val, End)) {
           return Error(Tok.getLoc(), "Unexpected identifier!");
-        if (auto *CE = dyn_cast<MCConstantExpr>(Val)) {
-          if (SM.onInteger(CE->getValue(), ErrMsg))
-            return Error(IdentLoc, ErrMsg);
-        } else if (SM.onIdentifierExpr(Val, Identifier, ErrMsg))
+        } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
           return Error(IdentLoc, ErrMsg);
-      } else if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
+        } else
+          break;
+      }
+      // MS InlineAsm operators (TYPE/LENGTH/SIZE)
+      if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
         if (OpKind == IOK_OFFSET)
           return Error(IdentLoc, "Dealing OFFSET operator as part of"
             "a compound immediate expression is yet to be supported");
-        int64_t Val = ParseIntelInlineAsmOperator(OpKind);
-        if (!Val)
+        if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
+          if (SM.onInteger(Val, ErrMsg))
+            return Error(IdentLoc, ErrMsg);
+        } else
           return true;
-        if (SM.onInteger(Val, ErrMsg))
-          return Error(IdentLoc, ErrMsg);
-      } else if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
-          if (ParseIntelDotOperator(SM, End))
-            return true;
-      } else if (ParseIntelInlineAsmIdentifier(Val, Identifier,
-                                               SM.getIdentifierInfo(),
-                                               /*Unevaluated=*/false, End)) {
+        break;
+      }
+      // MS Dot Operator expression
+      if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
+        if (ParseIntelDotOperator(SM, End))
+          return true;
+        break;
+      }
+      // MS InlineAsm identifier
+      if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
         return true;
-      } else if (SM.onIdentifierExpr(Val, Identifier, ErrMsg)) {
+      else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
         return Error(IdentLoc, ErrMsg);
-      }
       break;
     }
     case AsmToken::Integer: {
@@ -1405,7 +1427,9 @@ bool X86AsmParser::ParseIntelExpression(
           if (IDVal == "b" && Sym->isUndefined())
             return Error(Loc, "invalid reference to undefined symbol");
           StringRef Identifier = Sym->getName();
-          if (SM.onIdentifierExpr(Val, Identifier, ErrMsg))
+          InlineAsmIdentifierInfo Info;
+          if (SM.onIdentifierExpr(Val, Identifier, Info,
+              isParsingInlineAsm(), ErrMsg))
             return Error(Loc, ErrMsg);
           End = consumeToken();
         } else {
@@ -1500,8 +1524,7 @@ bool X86AsmParser::ParseIntelInlineAsmId
   Val = nullptr;
 
   StringRef LineBuf(Identifier.data());
-  void *Result =
-    SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
+  SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
 
   const AsmToken &Tok = Parser.getTok();
   SMLoc Loc = Tok.getLoc();
@@ -1517,12 +1540,13 @@ bool X86AsmParser::ParseIntelInlineAsmId
 
   // The frontend should end parsing on an assembler token boundary, unless it
   // failed parsing.
-  assert((End.getPointer() == EndPtr || !Result) &&
-         "frontend claimed part of a token?");
+  assert((End.getPointer() == EndPtr ||
+          Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
+          "frontend claimed part of a token?");
 
   // If the identifier lookup was unsuccessful, assume that we are dealing with
   // a label.
-  if (!Result) {
+  if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
     StringRef InternalName =
       SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
                                          Loc, false);
@@ -1530,8 +1554,8 @@ bool X86AsmParser::ParseIntelInlineAsmId
     // Push a rewrite for replacing the identifier name with the internal name.
     InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
                                         InternalName);
-  }
-
+  } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
+    return false;
   // Create the symbol reference.
   MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
@@ -1625,6 +1649,12 @@ std::unique_ptr<X86Operand> X86AsmParser
                                     /*Unevaluated=*/false, End))
     return nullptr;
 
+  void *Decl = nullptr;
+  // FIXME: MS evaluates "offset <Constant>" to the underlying integral
+  if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
+    return ErrorOperand(Start, "offset operator cannot yet handle constants");
+  else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
+    Decl = Info.Var.Decl;
   // Don't emit the offset operator.
   InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
 
@@ -1635,7 +1665,7 @@ std::unique_ptr<X86Operand> X86AsmParser
   unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
 
   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
-                               OffsetOfLoc, Identifier, Info.OpDecl);
+                               OffsetOfLoc, Identifier, Decl);
 }
 
 // Query a candidate string for being an Intel assembly operator
@@ -1668,7 +1698,7 @@ unsigned X86AsmParser::ParseIntelInlineA
                                     /*Unevaluated=*/true, End))
     return 0;
 
-  if (!Info.OpDecl) {
+  if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
     Error(Start, "unable to lookup expression");
     return 0;
   }
@@ -1676,9 +1706,9 @@ unsigned X86AsmParser::ParseIntelInlineA
   unsigned CVal = 0;
   switch(OpKind) {
   default: llvm_unreachable("Unexpected operand kind!");
-  case IOK_LENGTH: CVal = Info.Length; break;
-  case IOK_SIZE: CVal = Info.Size; break;
-  case IOK_TYPE: CVal = Info.Type; break;
+  case IOK_LENGTH: CVal = Info.Var.Length; break;
+  case IOK_SIZE: CVal = Info.Var.Size; break;
+  case IOK_TYPE: CVal = Info.Var.Type; break;
   }
 
   return CVal;




More information about the llvm-commits mailing list