[llvm] [X86] Fix misassemble due to not storing registers to state machine on RParen (PR #150252)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 23 09:05:30 PDT 2025


https://github.com/Heath123 created https://github.com/llvm/llvm-project/pull/150252

This fixes #116883.

The x86 parser saves any register it encounters to a TmpReg field in its state machine, then on encountering the next valid token immediately afterwards saves it to either BaseReg, or IndexReg if BaseReg was already filled. However, this saving logic was missing on the RParen token handler, causing the parser to "forget" the register immediately beforehand. This also would prevent later validation logic from detecting the addressing mode as invalid, leading to a silent misassembly rather than an error.


>From 0e558e09ca60117b4949e2fe4dfc9ddd22b90516 Mon Sep 17 00:00:00 2001
From: Heath Mitchell <heath.mitchell at sony.com>
Date: Wed, 23 Jul 2025 13:22:23 +0100
Subject: [PATCH] [X86] Fix misassemble due to not storing registers to state
 machine on RParen

This fixes #116883.

The x86 parser saves any register it encounters to a TmpReg field in its state
machine, then on encountering the next valid token immediately afterwards saves
it to either BaseReg, or IndexReg if BaseReg was already filled. However, this
saving logic was missing on the RParen token handler, causing the parser to
"forget" the register immediately beforehand. This also would prevent later
validation logic from detecting the addressing mode as invalid, leading to a
silent misassembly rather than an error.
---
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 28 +++++++++++++++++--
 llvm/test/MC/X86/intel-syntax-parentheses.s   | 10 +++++++
 2 files changed, 35 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/MC/X86/intel-syntax-parentheses.s

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b642c1cfe383b..17a69e0ce0f81 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1042,8 +1042,8 @@ class X86AsmParser : public MCTargetAsmParser {
       }
       PrevState = CurrState;
     }
-    void onRParen() {
-      PrevState = State;
+    bool onRParen(StringRef &ErrMsg) {
+      IntelExprState CurrState = State;
       switch (State) {
       default:
         State = IES_ERROR;
@@ -1054,9 +1054,27 @@ class X86AsmParser : public MCTargetAsmParser {
       case IES_RBRAC:
       case IES_RPAREN:
         State = IES_RPAREN;
+        // In the case of a multiply, onRegister has already set IndexReg
+        // directly, with appropriate scale
+        // Otherwise if we just saw a register it has only been stored in
+        // TmpReg, so we need to store it into the state machine
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // no explicit scale.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            if (IndexReg)
+              return regsUseUpError(ErrMsg);
+            IndexReg = TmpReg;
+            Scale = 0;
+          }
+        }
         IC.pushOperator(IC_RPAREN);
         break;
       }
+      PrevState = CurrState;
+      return false;
     }
     bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
                   const InlineAsmIdentifierInfo &IDInfo,
@@ -2172,7 +2190,11 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
       }
       break;
     case AsmToken::LParen:  SM.onLParen(); break;
-    case AsmToken::RParen:  SM.onRParen(); break;
+    case AsmToken::RParen:
+      if (SM.onRParen(ErrMsg)) {
+        return Error(Tok.getLoc(), ErrMsg);
+      }
+      break;
     }
     if (SM.hadError())
       return Error(Tok.getLoc(), "unknown token in expression");
diff --git a/llvm/test/MC/X86/intel-syntax-parentheses.s b/llvm/test/MC/X86/intel-syntax-parentheses.s
new file mode 100644
index 0000000000000..ae53f64089070
--- /dev/null
+++ b/llvm/test/MC/X86/intel-syntax-parentheses.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2>&1 | FileCheck %s
+
+.intel_syntax
+
+// CHECK: error: invalid base+index expression
+    lea     rdi, [(label + rsi) + rip]
+// CHECK: leaq    1(%rax,%rdi), %rdi
+    lea     rdi, [(rax + rdi) + 1]
+label:
+    .quad 42



More information about the llvm-commits mailing list