[llvm] 4cdea5f - [ms] [llvm-ml] Improve MASM STRUCT field accessor support

Eric Astor via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 13 07:35:08 PDT 2020


Author: Eric Astor
Date: 2020-07-13T10:34:30-04:00
New Revision: 4cdea5faf980951bf3c4cb4ade9850d27c32af16

URL: https://github.com/llvm/llvm-project/commit/4cdea5faf980951bf3c4cb4ade9850d27c32af16
DIFF: https://github.com/llvm/llvm-project/commit/4cdea5faf980951bf3c4cb4ade9850d27c32af16.diff

LOG: [ms] [llvm-ml] Improve MASM STRUCT field accessor support

Summary:
Adds support for several accessors:
- `[<identifier>.<struct name>].<field>`
- `[<identifier>.<struct name>.<field>].<subfield>` (where `field` has already-defined STRUCT type)
- `[<variable>.<field>].<subfield>` (where `field` has already-defined STRUCT type)

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D83344

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCParser/MCAsmParser.h
    llvm/lib/MC/MCParser/MasmParser.cpp
    llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
    llvm/test/tools/llvm-ml/struct.test

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 204008975959..a68066e0f50b 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -170,8 +170,12 @@ class MCAsmParser {
 
   virtual bool isParsingMasm() const { return false; }
 
-  virtual bool LookUpFieldOffset(StringRef Base, StringRef Member,
-                                 unsigned &Offset) {
+  virtual bool lookUpField(StringRef Name, StringRef &Type,
+                           unsigned &Offset) const {
+    return true;
+  }
+  virtual bool lookUpField(StringRef Base, StringRef Member, StringRef &Type,
+                           unsigned &Offset) const {
     return true;
   }
 

diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 3dbd00aae47a..d7d0508cabff 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -490,8 +490,10 @@ class MasmParser : public MCAsmParser {
 
   bool isParsingMasm() const override { return true; }
 
-  bool LookUpFieldOffset(StringRef Base, StringRef Member,
-                         unsigned &Offset) override;
+  bool lookUpField(StringRef Name, StringRef &Type,
+                   unsigned &Offset) const override;
+  bool lookUpField(StringRef Base, StringRef Member, StringRef &Type,
+                   unsigned &Offset) const override;
 
   bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
                         unsigned &NumOutputs, unsigned &NumInputs,
@@ -561,8 +563,8 @@ class MasmParser : public MCAsmParser {
   }
   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
 
-  bool LookUpFieldOffset(const StructInfo &Structure, StringRef Member,
-                         unsigned &Offset);
+  bool lookUpField(const StructInfo &Structure, StringRef Member,
+                   StringRef &Type, unsigned &Offset) const;
 
   /// Should we emit DWARF describing this assembler source?  (Returns false if
   /// the source has .file directives, which means we don't want to generate
@@ -1397,12 +1399,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     }
 
     // Find the field offset if used.
+    StringRef Type;
     unsigned Offset = 0;
     Split = SymbolName.split('.');
     if (!Split.second.empty()) {
       SymbolName = Split.first;
       if (Structs.count(SymbolName.lower()) &&
-          !LookUpFieldOffset(SymbolName, Split.second, Offset)) {
+          !lookUpField(SymbolName, Split.second, Type, Offset)) {
         // This is actually a reference to a field offset.
         Res = MCConstantExpr::create(Offset, getContext());
         return false;
@@ -1410,10 +1413,10 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
 
       auto TypeIt = KnownType.find(SymbolName);
       if (TypeIt == KnownType.end() ||
-          LookUpFieldOffset(*TypeIt->second, Split.second, Offset)) {
+          lookUpField(*TypeIt->second, Split.second, Type, Offset)) {
         std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
         StringRef Base = BaseMember.first, Member = BaseMember.second;
-        LookUpFieldOffset(Base, Member, Offset);
+        lookUpField(Base, Member, Type, Offset);
       }
     }
 
@@ -6519,34 +6522,56 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA,
   llvm_unreachable("Unstable rewrite sort.");
 }
 
-bool MasmParser::LookUpFieldOffset(StringRef Base, StringRef Member,
-                                   unsigned &Offset) {
+bool MasmParser::lookUpField(StringRef Name, StringRef &Type,
+                             unsigned &Offset) const {
+  const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
+  const StringRef Base = BaseMember.first, Member = BaseMember.second;
+  return lookUpField(Base, Member, Type, Offset);
+}
+
+bool MasmParser::lookUpField(StringRef Base, StringRef Member, StringRef &Type,
+                             unsigned &Offset) const {
   if (Base.empty())
     return true;
 
+  unsigned BaseOffset = 0;
+  if (Base.contains('.') && !lookUpField(Base, Type, BaseOffset))
+    Base = Type;
+
   auto TypeIt = KnownType.find(Base);
   if (TypeIt != KnownType.end())
-    return LookUpFieldOffset(*TypeIt->second, Member, Offset);
+    return lookUpField(*TypeIt->second, Member, Type, Offset);
 
   auto StructIt = Structs.find(Base.lower());
   if (StructIt != Structs.end())
-    return LookUpFieldOffset(StructIt->second, Member, Offset);
+    return lookUpField(StructIt->second, Member, Type, Offset);
 
   return true;
 }
 
-bool MasmParser::LookUpFieldOffset(const StructInfo &Structure,
-                                   StringRef Member, unsigned &Offset) {
+bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
+                             StringRef &Type, unsigned &Offset) const {
+  if (Member.empty()) {
+    Type = Structure.Name;
+    return false;
+  }
+
   std::pair<StringRef, StringRef> Split = Member.split('.');
   const StringRef FieldName = Split.first, FieldMember = Split.second;
 
+  auto StructIt = Structs.find(FieldName.lower());
+  if (StructIt != Structs.end())
+    return lookUpField(StructIt->second, FieldMember, Type, Offset);
+
   auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
   if (FieldIt == Structure.FieldsByName.end())
     return true;
 
   const FieldInfo &Field = Structure.Fields[FieldIt->second];
   if (FieldMember.empty()) {
-    Offset = Field.Offset;
+    Offset += Field.Offset;
+    if (Field.Contents.FT == FT_STRUCT)
+      Type = Field.Contents.StructInfo.Structure.Name;
     return false;
   }
 
@@ -6554,7 +6579,7 @@ bool MasmParser::LookUpFieldOffset(const StructInfo &Structure,
     return true;
   const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
 
-  bool Result = LookUpFieldOffset(StructInfo.Structure, FieldMember, Offset);
+  bool Result = lookUpField(StructInfo.Structure, FieldMember, Type, Offset);
   if (Result)
     return true;
 

diff  --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 0573d4eec059..fe09b2952f0e 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -358,6 +358,7 @@ class X86AsmParser : public MCTargetAsmParser {
     bool MemExpr;
     bool OffsetOperator;
     SMLoc OffsetOperatorLoc;
+    StringRef CurType;
 
     bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
       if (Sym) {
@@ -385,6 +386,7 @@ class X86AsmParser : public MCTargetAsmParser {
     unsigned getScale() { return Scale; }
     const MCExpr *getSym() { return Sym; }
     StringRef getSymName() { return SymName; }
+    StringRef getType() { return CurType; }
     int64_t getImm() { return Imm + IC.execute(); }
     bool isValidEndState() {
       return State == IES_RBRAC || State == IES_INTEGER;
@@ -846,6 +848,7 @@ class X86AsmParser : public MCTargetAsmParser {
       }
       return false;
     }
+    void setType(StringRef Type) { CurType = Type; }
   };
 
   bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
@@ -1641,27 +1644,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
           break;
         }
         if (Parser.isParsingMasm()) {
-          const std::pair<StringRef, StringRef> RegField =
+          const std::pair<StringRef, StringRef> IDField =
               Tok.getString().split('.');
-          const StringRef RegName = RegField.first, Field = RegField.second;
-          SMLoc RegEndLoc =
-              SMLoc::getFromPointer(RegName.data() + RegName.size());
+          const StringRef ID = IDField.first, Field = IDField.second;
+          SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
           if (!Field.empty() &&
-              !MatchRegisterByName(Reg, RegName, IdentLoc, RegEndLoc)) {
+              !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
             if (SM.onRegister(Reg, ErrMsg))
               return Error(IdentLoc, ErrMsg);
 
+            StringRef Type;
+            unsigned Offset = 0;
             SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
-            const std::pair<StringRef, StringRef> BaseMember = Field.split('.');
-            const StringRef Base = BaseMember.first, Member = BaseMember.second;
-
-            unsigned Offset;
-            if (Parser.LookUpFieldOffset(Base, Member, Offset))
+            if (Parser.lookUpField(Field, Type, Offset))
               return Error(FieldStartLoc, "unknown offset");
             else if (SM.onPlus(ErrMsg))
               return Error(getTok().getLoc(), ErrMsg);
             else if (SM.onInteger(Offset, ErrMsg))
               return Error(IdentLoc, ErrMsg);
+            SM.setType(Type);
 
             End = consumeToken();
             break;
@@ -1915,9 +1916,11 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
 }
 
 /// Parse the '.' operator.
-bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
+bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
+                                         SMLoc &End) {
   const AsmToken &Tok = getTok();
-  unsigned Offset;
+  StringRef Type;
+  unsigned Offset = 0;
 
   // Drop the optional '.'.
   StringRef DotDispStr = Tok.getString();
@@ -1933,8 +1936,9 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End)
              Tok.is(AsmToken::Identifier)) {
     const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
     const StringRef Base = BaseMember.first, Member = BaseMember.second;
-    if (getParser().LookUpFieldOffset(SM.getSymName(), DotDispStr, Offset) &&
-        getParser().LookUpFieldOffset(Base, Member, Offset) &&
+    if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) &&
+        getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) &&
+        getParser().lookUpField(DotDispStr, Type, Offset) &&
         (!SemaCallback ||
          SemaCallback->LookupInlineAsmField(Base, Member, Offset)))
       return Error(Tok.getLoc(), "Unable to lookup field reference!");
@@ -1947,6 +1951,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End)
   while (Tok.getLoc().getPointer() < DotExprEndLoc)
     Lex();
   SM.addImm(Offset);
+  SM.setType(Type);
   return false;
 }
 

diff  --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test
index 0e60d2449455..ecd89a140371 100644
--- a/llvm/test/tools/llvm-ml/struct.test
+++ b/llvm/test/tools/llvm-ml/struct.test
@@ -85,13 +85,11 @@ t3:
 mov eax, t2.f.h
 mov eax, [t2].f.h
 mov eax, [t2.f.h]
-mov eax, t2.FOOBAR.f.h
 
 ; CHECK: t3:
 ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
 ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
 ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
-; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
 
 t4:
 mov eax, j.FOOBAR.f.h
@@ -101,4 +99,52 @@ mov eax, j.baz.b
 ; CHECK-NEXT: mov eax, dword ptr [rip + j+12]
 ; CHECK-NEXT: mov eax, dword ptr [rip + j+1]
 
+t5:
+mov eax, [ebx].FOOBAR.f.h
+mov eax, [ebx.FOOBAR].f.h
+mov eax, [ebx.FOOBAR.f.h]
+
+; CHECK: t5:
+; CHECK-NEXT: mov eax, dword ptr [ebx + 12]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 12]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 12]
+
+t6:
+mov eax, t2.FOOBAR.f.h
+mov eax, [t2].FOOBAR.f.h
+mov eax, [t2.FOOBAR].f.h
+mov eax, [t2.FOOBAR.f.h]
+
+; CHECK: t6:
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
+
+t7:
+mov eax, [ebx].FOOBAR.e.b
+mov eax, [ebx.FOOBAR].e.b
+mov eax, [ebx.FOOBAR.e].b
+mov eax, [ebx.FOOBAR.e.b]
+
+; CHECK: t7:
+; CHECK-NEXT: mov eax, dword ptr [ebx + 9]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 9]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 9]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 9]
+
+t8:
+mov eax, t2.FOOBAR.e.b
+mov eax, [t2].FOOBAR.e.b
+mov eax, [t2.FOOBAR].e.b
+mov eax, [t2.FOOBAR.e].b
+mov eax, [t2.FOOBAR.e.b]
+
+; CHECK: t8:
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
+; CHECK-NEXT: mov eax, dword ptr [rip + (t2+8)+1]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
+
 END


        


More information about the llvm-commits mailing list