[llvm] [ms] [llvm-ml] Allow PTR casting of registers to their own size (PR #132751)

Eric Astor via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 24 14:32:54 PDT 2025


https://github.com/ericastor updated https://github.com/llvm/llvm-project/pull/132751

>From 0d0c008ddc48ec05e3da1e037ba240343658e9fc Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor at google.com>
Date: Mon, 24 Mar 2025 14:50:14 +0000
Subject: [PATCH 1/4] [ms] [llvm-ml] Allow PTR casting of registers to their
 own size

MASM allows no-op casts on register operands; for example, `DWORD PTR eax` is a legal expression. Any other cast is an error.

Fixes #132084
---
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 56 +++++++++++++++++--
 llvm/test/tools/llvm-ml/cast.asm              | 25 +++++++++
 llvm/test/tools/llvm-ml/cast_errors.asm       | 41 ++++++++++++++
 3 files changed, 116 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/tools/llvm-ml/cast.asm
 create mode 100644 llvm/test/tools/llvm-ml/cast_errors.asm

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a6285a55f4155..475b0016ace68 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -14,6 +14,9 @@
 #include "MCTargetDesc/X86TargetStreamer.h"
 #include "TargetInfo/X86TargetInfo.h"
 #include "X86Operand.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/StringRef.h"
+#include "third_party/llvm/llvm-project/llvm/include/llvm/MC/MCRegister.h"
+#include "third_party/llvm/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -38,6 +41,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 
 using namespace llvm;
@@ -1150,7 +1154,7 @@ class X86AsmParser : public MCTargetAsmParser {
 
   X86::CondCode ParseConditionCode(StringRef CCode);
 
-  bool ParseIntelMemoryOperandSize(unsigned &Size);
+  bool ParseIntelMemoryOperandSize(unsigned &Size, StringRef *SizeStr);
   bool CreateMemForMSInlineAsm(MCRegister SegReg, const MCExpr *Disp,
                                MCRegister BaseReg, MCRegister IndexReg,
                                unsigned Scale, bool NonAbsMem, SMLoc Start,
@@ -2551,7 +2555,8 @@ bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
   return false;
 }
 
-bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
+bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,
+                                               StringRef *SizeStr) {
   Size = StringSwitch<unsigned>(getTok().getString())
     .Cases("BYTE", "byte", 8)
     .Cases("WORD", "word", 16)
@@ -2569,6 +2574,8 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
     .Cases("ZMMWORD", "zmmword", 512)
     .Default(0);
   if (Size) {
+    if (SizeStr)
+      *SizeStr = getTok().getString();
     const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
     if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
       return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
@@ -2577,6 +2584,31 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
   return false;
 }
 
+uint16_t RegSizeInBits(MCRegister RegNo, const MCRegisterInfo &MRI) {
+  uint16_t Size = 0;
+  if (X86MCRegisterClasses[X86::GR8RegClassID].contains(RegNo))
+    Size = 8;
+  else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(RegNo))
+    Size = 16;
+  else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo))
+    Size = 32;
+  else if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
+    Size = 64;
+  else if (X86MCRegisterClasses[X86::RFP80RegClassID].contains(RegNo))
+    Size = 80;
+  else if (X86MCRegisterClasses[X86::VR128RegClassID].contains(RegNo))
+    Size = 128;
+  else if (X86MCRegisterClasses[X86::VR128XRegClassID].contains(RegNo))
+    Size = 128;
+  else if (X86MCRegisterClasses[X86::VR256XRegClassID].contains(RegNo))
+    Size = 256;
+  else if (X86MCRegisterClasses[X86::VR512RegClassID].contains(RegNo))
+    Size = 512;
+  else
+    llvm_unreachable("Register without known register class");
+  return Size;
+}
+
 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
@@ -2584,7 +2616,8 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
 
   // Parse optional Size directive.
   unsigned Size;
-  if (ParseIntelMemoryOperandSize(Size))
+  StringRef SizeStr;
+  if (ParseIntelMemoryOperandSize(Size, &SizeStr))
     return true;
   bool PtrInOperand = bool(Size);
 
@@ -2601,9 +2634,20 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
       return Error(Start, "rip can only be used as a base register");
     // A Register followed by ':' is considered a segment override
     if (Tok.isNot(AsmToken::Colon)) {
-      if (PtrInOperand)
-        return Error(Start, "expected memory operand after 'ptr', "
-                            "found register operand instead");
+      if (PtrInOperand) {
+        if (!Parser.isParsingMasm())
+          return Error(Start, "expected memory operand after 'ptr', "
+                              "found register operand instead");
+
+        // If we are parsing MASM, we are allowed to cast registers to their own
+        // sizes, but not to other types.
+        if (RegSizeInBits(RegNo, *getContext().getRegisterInfo()) != Size)
+          return Error(
+              Start,
+              "cannot cast register '" +
+                  StringRef(getContext().getRegisterInfo()->getName(RegNo)) +
+                  "' to '" + SizeStr + "'; size does not match");
+      }
       Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
       return false;
     }
diff --git a/llvm/test/tools/llvm-ml/cast.asm b/llvm/test/tools/llvm-ml/cast.asm
new file mode 100644
index 0000000000000..2b4aaae88866e
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/cast.asm
@@ -0,0 +1,25 @@
+; RUN: llvm-ml -m64 -filetype=s %s /Fo - | FileCheck %s
+
+.code
+
+mov byte ptr al, al
+mov al, byte ptr al
+; CHECK: mov al, al
+; CHECK-NEXT: mov al, al
+
+mov word ptr ax, ax
+mov ax, word ptr ax
+; CHECK: mov ax, ax
+; CHECK-NEXT: mov ax, ax
+
+mov dword ptr eax, eax
+mov eax, dword ptr eax
+; CHECK: mov eax, eax
+; CHECK-NEXT: mov eax, eax
+
+mov qword ptr rax, rax
+mov rax, qword ptr rax
+; CHECK: mov rax, rax
+; CHECK-NEXT: mov rax, rax
+
+END
diff --git a/llvm/test/tools/llvm-ml/cast_errors.asm b/llvm/test/tools/llvm-ml/cast_errors.asm
new file mode 100644
index 0000000000000..60cd9a4454ed8
--- /dev/null
+++ b/llvm/test/tools/llvm-ml/cast_errors.asm
@@ -0,0 +1,41 @@
+; RUN: not llvm-ml -m64 -filetype=s %s /Fo /dev/null 2>&1 | FileCheck %s
+
+.code
+
+mov word ptr al, ax
+; CHECK: error: cannot cast register 'AL' to 'word'; size does not match
+
+mov dword ptr al, eax
+; CHECK: error: cannot cast register 'AL' to 'dword'; size does not match
+
+mov qword ptr al, rax
+; CHECK: error: cannot cast register 'AL' to 'qword'; size does not match
+
+mov byte ptr ax, al
+; CHECK: error: cannot cast register 'AX' to 'byte'; size does not match
+
+mov dword ptr ax, eax
+; CHECK: error: cannot cast register 'AX' to 'dword'; size does not match
+
+mov qword ptr ax, rax
+; CHECK: error: cannot cast register 'AX' to 'qword'; size does not match
+
+mov byte ptr eax, al
+; CHECK: error: cannot cast register 'EAX' to 'byte'; size does not match
+
+mov word ptr eax, ax
+; CHECK: error: cannot cast register 'EAX' to 'word'; size does not match
+
+mov qword ptr eax, rax
+; CHECK: error: cannot cast register 'EAX' to 'qword'; size does not match
+
+mov byte ptr rax, al
+; CHECK: error: cannot cast register 'RAX' to 'byte'; size does not match
+
+mov word ptr rax, ax
+; CHECK: error: cannot cast register 'RAX' to 'word'; size does not match
+
+mov dword ptr rax, eax
+; CHECK: error: cannot cast register 'RAX' to 'dword'; size does not match
+
+END

>From 5fbf8b6af4dbb6ee4171a31ef0e55460fcd98e09 Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor at google.com>
Date: Mon, 24 Mar 2025 15:00:18 +0000
Subject: [PATCH 2/4] Fix includes

---
 llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 475b0016ace68..46483b48e37d2 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -14,13 +14,12 @@
 #include "MCTargetDesc/X86TargetStreamer.h"
 #include "TargetInfo/X86TargetInfo.h"
 #include "X86Operand.h"
-#include "third_party/llvm/llvm-project/llvm/include/llvm/ADT/StringRef.h"
-#include "third_party/llvm/llvm-project/llvm/include/llvm/MC/MCRegister.h"
-#include "third_party/llvm/llvm-project/llvm/lib/Target/X86/X86RegisterInfo.h"
+#include "X86RegisterInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -31,6 +30,7 @@
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCRegister.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"

>From 2bbaea76071fc32e2c00eab97c31ac2d55dadaa7 Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor at google.com>
Date: Mon, 24 Mar 2025 16:50:31 +0000
Subject: [PATCH 3/4] Fix include order

---
 llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 46483b48e37d2..bedff30bc6483 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -18,8 +18,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -29,8 +29,8 @@
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCRegister.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"

>From edef7c18614b68434f02912fbbe3e783a42de65a Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor at google.com>
Date: Mon, 24 Mar 2025 21:31:21 +0000
Subject: [PATCH 4/4] Address feedback

---
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 43 +++++++++----------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index bedff30bc6483..2ab859d9cc9b4 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2584,29 +2584,28 @@ bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size,
   return false;
 }
 
-uint16_t RegSizeInBits(MCRegister RegNo, const MCRegisterInfo &MRI) {
+uint16_t RegSizeInBits(const MCRegisterInfo &MRI, MCRegister RegNo) {
   uint16_t Size = 0;
   if (X86MCRegisterClasses[X86::GR8RegClassID].contains(RegNo))
-    Size = 8;
-  else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(RegNo))
-    Size = 16;
-  else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo))
-    Size = 32;
-  else if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
-    Size = 64;
-  else if (X86MCRegisterClasses[X86::RFP80RegClassID].contains(RegNo))
-    Size = 80;
-  else if (X86MCRegisterClasses[X86::VR128RegClassID].contains(RegNo))
-    Size = 128;
-  else if (X86MCRegisterClasses[X86::VR128XRegClassID].contains(RegNo))
-    Size = 128;
-  else if (X86MCRegisterClasses[X86::VR256XRegClassID].contains(RegNo))
-    Size = 256;
-  else if (X86MCRegisterClasses[X86::VR512RegClassID].contains(RegNo))
-    Size = 512;
-  else
-    llvm_unreachable("Register without known register class");
-  return Size;
+    return 8;
+  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(RegNo))
+    return 16;
+  if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo))
+    return 32;
+  if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
+    return 64;
+  if (X86MCRegisterClasses[X86::RFP80RegClassID].contains(RegNo))
+    return 80;
+  if (X86MCRegisterClasses[X86::VR128RegClassID].contains(RegNo))
+    return 128;
+  if (X86MCRegisterClasses[X86::VR128XRegClassID].contains(RegNo))
+    return 128;
+  if (X86MCRegisterClasses[X86::VR256XRegClassID].contains(RegNo))
+    return 256;
+  if (X86MCRegisterClasses[X86::VR512RegClassID].contains(RegNo))
+    return 512;
+  llvm_unreachable("Register without known register class");
+  return 0;
 }
 
 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
@@ -2641,7 +2640,7 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
 
         // If we are parsing MASM, we are allowed to cast registers to their own
         // sizes, but not to other types.
-        if (RegSizeInBits(RegNo, *getContext().getRegisterInfo()) != Size)
+        if (RegSizeInBits(*getContext().getRegisterInfo(), RegNo) != Size)
           return Error(
               Start,
               "cannot cast register '" +



More information about the llvm-commits mailing list