[llvm] [X86][MC][AsmParser] Reject H-byte regs with VEX/EVEX-encoded 8-bit RR (NDD) (PR #160039)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 23 19:53:03 PDT 2025


https://github.com/woruyu updated https://github.com/llvm/llvm-project/pull/160039

>From 2c3351100415f78d5bc333408ca58dc0d95e33b3 Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Sun, 21 Sep 2025 22:05:05 -0900
Subject: [PATCH 1/4] [X86][MC][AsmParser] Reject H-byte regs with
 VEX/EVEX-encoded 8-bit RR (NDD)

---
 llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 18 ++++++++++++++++++
 llvm/test/MC/X86/encoder-fail-VEX-EVEX.s       | 16 ++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 llvm/test/MC/X86/encoder-fail-VEX-EVEX.s

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ce5e92135f706..d1ebe6bdb2dfd 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -4044,6 +4044,24 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
     }
   }
 
+  unsigned Enc = TSFlags & X86II::EncodingMask;
+  if (Enc == X86II::VEX || Enc == X86II::EVEX || Enc == X86II::XOP) {
+    unsigned NumOps = Inst.getNumOperands();
+    for (unsigned i = 0; i != NumOps; ++i) {
+      const MCOperand &MO = Inst.getOperand(i);
+      if (!MO.isReg())
+        continue;
+      MCRegister Reg = MO.getReg();
+      if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH ||
+          Reg == X86::DH) {
+        StringRef RegName = X86IntelInstPrinter::getRegisterName(Reg);
+        return Error(Ops[0]->getStartLoc(),
+                     "can't encode '" + RegName +
+                         "' in a VEX/EVEX-prefixed instruction");
+      }
+    }
+  }
+
   if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
     const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
     if (!MO.isReg() || MO.getReg() != X86::RIP)
diff --git a/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s b/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s
new file mode 100644
index 0000000000000..90512741c9c6c
--- /dev/null
+++ b/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s
@@ -0,0 +1,16 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel --show-encoding %s 2>&1 | FileCheck %s
+
+// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
+add ah, ah, ah
+
+// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
+and ah, byte ptr [-13426159], ah
+
+// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
+ccmpa {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
+
+// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
+ccmpae {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
+
+// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
+sar ah, byte ptr [-13426159]
\ No newline at end of file

>From 7338d2e886da17679350cd25ea0f67b9908c34f0 Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Sun, 21 Sep 2025 22:16:15 -0900
Subject: [PATCH 2/4] fix: details

---
 llvm/test/MC/X86/encoder-fail-VEX-EVEX.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s b/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s
index 90512741c9c6c..bd6efddd46901 100644
--- a/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s
+++ b/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s
@@ -13,4 +13,4 @@ ccmpa {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
 ccmpae {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
 
 // CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
-sar ah, byte ptr [-13426159]
\ No newline at end of file
+sar ah, byte ptr [-13426159]

>From 154f9762f374ba3f4d47478a3bca0247abfbdddc Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Mon, 22 Sep 2025 00:01:08 -0900
Subject: [PATCH 3/4] fix: review

---
 llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp |  5 ++---
 llvm/test/MC/X86/encoder-fail-EVEX.s           | 16 ++++++++++++++++
 llvm/test/MC/X86/encoder-fail-VEX-EVEX.s       | 16 ----------------
 3 files changed, 18 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/MC/X86/encoder-fail-EVEX.s
 delete mode 100644 llvm/test/MC/X86/encoder-fail-VEX-EVEX.s

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d1ebe6bdb2dfd..adce643073b09 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -4044,8 +4044,7 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
     }
   }
 
-  unsigned Enc = TSFlags & X86II::EncodingMask;
-  if (Enc == X86II::VEX || Enc == X86II::EVEX || Enc == X86II::XOP) {
+  if ((TSFlags & X86II::EncodingMask) == X86II::EVEX) {
     unsigned NumOps = Inst.getNumOperands();
     for (unsigned i = 0; i != NumOps; ++i) {
       const MCOperand &MO = Inst.getOperand(i);
@@ -4057,7 +4056,7 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
         StringRef RegName = X86IntelInstPrinter::getRegisterName(Reg);
         return Error(Ops[0]->getStartLoc(),
                      "can't encode '" + RegName +
-                         "' in a VEX/EVEX-prefixed instruction");
+                         "' in a EVEX-prefixed instruction");
       }
     }
   }
diff --git a/llvm/test/MC/X86/encoder-fail-EVEX.s b/llvm/test/MC/X86/encoder-fail-EVEX.s
new file mode 100644
index 0000000000000..578a730f1ae5f
--- /dev/null
+++ b/llvm/test/MC/X86/encoder-fail-EVEX.s
@@ -0,0 +1,16 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel --show-encoding %s 2>&1 | FileCheck %s
+
+// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
+add ah, ah, ah
+
+// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
+and ah, byte ptr [-13426159], ah
+
+// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
+ccmpa {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
+
+// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
+ccmpae {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
+
+// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
+sar ah, byte ptr [-13426159]
diff --git a/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s b/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s
deleted file mode 100644
index bd6efddd46901..0000000000000
--- a/llvm/test/MC/X86/encoder-fail-VEX-EVEX.s
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: not llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel --show-encoding %s 2>&1 | FileCheck %s
-
-// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
-add ah, ah, ah
-
-// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
-and ah, byte ptr [-13426159], ah
-
-// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
-ccmpa {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
-
-// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
-ccmpae {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
-
-// CHECK: error: can't encode 'ah' in a VEX/EVEX-prefixed instruction
-sar ah, byte ptr [-13426159]

>From 4ae1e957f4ecc293772ae7e72170b3526f556c10 Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Tue, 23 Sep 2025 17:52:45 -0900
Subject: [PATCH 4/4] fix: review

---
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 35 ++++++++-----------
 llvm/test/MC/X86/encoder-fail-EVEX.s          | 16 ---------
 llvm/test/MC/X86/encoder-fail.s               | 19 ++++++++++
 3 files changed, 33 insertions(+), 37 deletions(-)
 delete mode 100644 llvm/test/MC/X86/encoder-fail-EVEX.s

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index adce643073b09..5232ebc8c878b 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -4020,7 +4020,10 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
 
   // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
   // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
-  if ((TSFlags & X86II::EncodingMask) == 0) {
+  const unsigned Enc = TSFlags & X86II::EncodingMask;
+  const bool UsesEvex = (Enc == X86II::EVEX);
+  const bool UsesRex2 = (ForcedOpcodePrefix == OpcodePrefix_REX2);
+  if (Enc == 0 || UsesEvex || UsesRex2) {
     MCRegister HReg;
     bool UsesRex = TSFlags & X86II::REX_W;
     unsigned NumOps = Inst.getNumOperands();
@@ -4031,33 +4034,23 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
       MCRegister Reg = MO.getReg();
       if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
         HReg = Reg;
-      if (X86II::isX86_64NonExtLowByteReg(Reg) ||
-          X86II::isX86_64ExtendedReg(Reg))
+      if (Enc == 0 && (X86II::isX86_64NonExtLowByteReg(Reg) ||
+                       X86II::isX86_64ExtendedReg(Reg)))
         UsesRex = true;
     }
 
-    if (UsesRex && HReg) {
+    if (Enc == 0 && UsesRex && HReg) {
       StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg);
       return Error(Ops[0]->getStartLoc(),
-                   "can't encode '" + RegName + "' in an instruction requiring "
-                   "REX prefix");
+                   "can't encode '" + RegName +
+                       "' in an instruction requiring REX prefix");
     }
-  }
 
-  if ((TSFlags & X86II::EncodingMask) == X86II::EVEX) {
-    unsigned NumOps = Inst.getNumOperands();
-    for (unsigned i = 0; i != NumOps; ++i) {
-      const MCOperand &MO = Inst.getOperand(i);
-      if (!MO.isReg())
-        continue;
-      MCRegister Reg = MO.getReg();
-      if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH ||
-          Reg == X86::DH) {
-        StringRef RegName = X86IntelInstPrinter::getRegisterName(Reg);
-        return Error(Ops[0]->getStartLoc(),
-                     "can't encode '" + RegName +
-                         "' in a EVEX-prefixed instruction");
-      }
+    if ((UsesEvex || UsesRex2) && HReg) {
+      StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg);
+      return Error(Ops[0]->getStartLoc(),
+                   "can't encode '" + RegName.str() +
+                       "' in an instruction requiring EVEX/REX2 prefix");
     }
   }
 
diff --git a/llvm/test/MC/X86/encoder-fail-EVEX.s b/llvm/test/MC/X86/encoder-fail-EVEX.s
deleted file mode 100644
index 578a730f1ae5f..0000000000000
--- a/llvm/test/MC/X86/encoder-fail-EVEX.s
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: not llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel --show-encoding %s 2>&1 | FileCheck %s
-
-// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
-add ah, ah, ah
-
-// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
-and ah, byte ptr [-13426159], ah
-
-// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
-ccmpa {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
-
-// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
-ccmpae {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
-
-// CHECK: error: can't encode 'ah' in a EVEX-prefixed instruction
-sar ah, byte ptr [-13426159]
diff --git a/llvm/test/MC/X86/encoder-fail.s b/llvm/test/MC/X86/encoder-fail.s
index a8b9f48c8fb70..86f4e37e96375 100644
--- a/llvm/test/MC/X86/encoder-fail.s
+++ b/llvm/test/MC/X86/encoder-fail.s
@@ -1,4 +1,5 @@
 // RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding -x86-asm-syntax=intel %s 2>&1 | FileCheck %s --check-prefix=CHECK-INTEL
 
 // CHECK: error: can't encode 'dh' in an instruction requiring REX prefix
 movzx %dh, %rsi
@@ -14,3 +15,21 @@ mov %ch, (%r8)
 
 // CHECK: error: can't encode 'dh' in an instruction requiring REX prefix
 mov %dh, (%rax,%r8)
+
+// CHECK-INTEL: error: can't encode 'ah' in an instruction requiring EVEX/REX2 prefix
+add ah, ah, ah
+
+// CHECK-INTEL: error: can't encode 'ah' in an instruction requiring EVEX/REX2 prefix
+and ah, byte ptr [-13426159], ah
+
+// CHECK-INTEL: error: can't encode 'ah' in an instruction requiring EVEX/REX2 prefix
+ccmpa {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
+
+// CHECK-INTEL: error: can't encode 'ah' in an instruction requiring EVEX/REX2 prefix
+ccmpae {dfv=of,cf} byte ptr [r8 + 4*rax + 291], ah
+
+// CHECK-INTEL: error: can't encode 'ah' in an instruction requiring EVEX/REX2 prefix
+sar ah, byte ptr [-13426159]
+
+// CHECK-INTEL: error: can't encode 'ah' in an instruction requiring EVEX/REX2 prefix
+{rex2} add ah, al



More information about the llvm-commits mailing list