[llvm] MC: X86 intel syntax: Support data32 and data16 better (PR #156287)
Danny Milosavljevic via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 17:53:53 PDT 2025
https://github.com/daym updated https://github.com/llvm/llvm-project/pull/156287
>From dd6ec68b75a8dc3c8a78b0ea3135a753b5071076 Mon Sep 17 00:00:00 2001
From: Danny Milosavljevic <dannym at friendly-machines.com>
Date: Mon, 1 Sep 2025 05:21:06 +0200
Subject: [PATCH 1/2] [x86][MC] Fix data32 push.
---
.../lib/Target/X86/AsmParser/X86AsmParser.cpp | 20 +++++++++++++++----
llvm/test/MC/X86/x86-16-intel.s | 13 ++++++++++++
llvm/test/MC/X86/x86-16.s | 12 +++++++++++
3 files changed, 41 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/MC/X86/x86-16-intel.s
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d7671ed19589b..66acc6f9af860 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -4538,14 +4538,26 @@ bool X86AsmParser::matchAndEmitIntelInstruction(
if (X86Op->isImm()) {
// If it's not a constant fall through and let remainder take care of it.
const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
- unsigned Size = getPointerWidth();
+ // Determine the size. Prioritize the ForcedDataPrefix flag if it was set
+ // by a 'data32' prefix. Otherwise, fall back to the pointer width of the
+ // current mode.
+ unsigned Size = (ForcedDataPrefix == X86::Is32Bit) ? 32
+ : (ForcedDataPrefix == X86::Is16Bit) ? 16
+ : getPointerWidth();
+ ForcedDataPrefix = 0;
if (CE &&
(isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
SmallString<16> Tmp;
Tmp += Base;
- Tmp += (is64BitMode())
- ? "q"
- : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
+ // Append the suffix corresponding to the determined size.
+ if (Size == 64)
+ Tmp += "q";
+ else if (Size == 32)
+ Tmp += "l";
+ else if (Size == 16)
+ Tmp += "w";
+ else
+ Tmp += " ";
Op.setTokenValue(Tmp);
// Do match in ATT mode to allow explicit suffix usage.
Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
diff --git a/llvm/test/MC/X86/x86-16-intel.s b/llvm/test/MC/X86/x86-16-intel.s
new file mode 100644
index 0000000000000..77ae4ae217218
--- /dev/null
+++ b/llvm/test/MC/X86/x86-16-intel.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple i386-unknown-unknown-code16 --x86-asm-syntax=intel --show-encoding %s | FileCheck %s
+
+// CHECK: pushl $8
+// CHECK: encoding: [0x66,0x6a,0x08]
+ data32 push 8
+
+// CHECK: pushw $8
+// CHECK: encoding: [0x6a,0x08]
+ push 8
+
+// CHECK: lretl
+// CHECK: encoding: [0x66,0xcb]
+ data32 retf
diff --git a/llvm/test/MC/X86/x86-16.s b/llvm/test/MC/X86/x86-16.s
index b0a4bda56fcbf..b4e116ab1a0fb 100644
--- a/llvm/test/MC/X86/x86-16.s
+++ b/llvm/test/MC/X86/x86-16.s
@@ -1060,3 +1060,15 @@ xresldtrk
// CHECK: encoding: [0x66,0x8b,0x1e,A,A]
// CHECK: fixup A - offset: 3, value: nearer, kind: FK_Data_2
movl nearer, %ebx
+
+// CHECK: pushl $8
+// CHECK: encoding: [0x66,0x6a,0x08]
+data32 push $8
+
+// CHECK: pushl $8
+// CHECK: encoding: [0x66,0x6a,0x08]
+pushl $8
+
+// CHECK: pushw $8
+// CHECK: encoding: [0x6a,0x08]
+push $8
>From 22edcc909fbbee01360e28a2bffcb624eb218df1 Mon Sep 17 00:00:00 2001
From: Danny Milosavljevic <dannym at friendly-machines.com>
Date: Mon, 1 Sep 2025 20:34:57 +0200
Subject: [PATCH 2/2] [x86][MC]: Fix data16.
---
.../lib/Target/X86/AsmParser/X86AsmParser.cpp | 31 +++++++++++++++++--
llvm/lib/Target/X86/X86InstrAsmAlias.td | 4 +++
llvm/lib/Target/X86/X86InstrSystem.td | 3 ++
llvm/test/MC/X86/intel-syntax-32.s | 29 ++++++++++++++++-
4 files changed, 64 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 66acc6f9af860..0c800d0e6152c 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3523,8 +3523,35 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
PatchedName = Name;
// Hacks to handle 'data16' and 'data32'
- if (PatchedName == "data16" && is16BitMode()) {
- return Error(NameLoc, "redundant data16 prefix");
+ if (PatchedName == "data16") {
+ if (is16BitMode())
+ return Error(NameLoc, "redundant data16 prefix");
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ StringRef Next = Parser.getTok().getString();
+ getLexer().Lex();
+ // data16 effectively changes the instruction suffix.
+ // TODO Generalize.
+ if (Next == "call")
+ Next = "callw";
+ if (Next == "ljmp")
+ Next = "ljmpw";
+ if (Next == "retf")
+ Next = "retfw";
+ if (Next == "lgdt") {
+ if (is64BitMode()) {
+ // Use the special lgdtq variant with OpSize16 flag.
+ // I think the CPU ignores the prefix anyway--but hey.
+ Next = "lgdtq16";
+ } else {
+ Next = "lgdtw";
+ }
+ }
+
+ Name = Next;
+ PatchedName = Name;
+ ForcedDataPrefix = X86::Is16Bit;
+ IsPrefix = false;
+ }
}
if (PatchedName == "data32") {
if (is32BitMode())
diff --git a/llvm/lib/Target/X86/X86InstrAsmAlias.td b/llvm/lib/Target/X86/X86InstrAsmAlias.td
index 5a4c3f61672b3..55e4f65b01f09 100644
--- a/llvm/lib/Target/X86/X86InstrAsmAlias.td
+++ b/llvm/lib/Target/X86/X86InstrAsmAlias.td
@@ -260,6 +260,9 @@ def : MnemonicAlias<"cqo", "cqto", "att">;
// In 64-bit mode lret maps to lretl; it is not ambiguous with lretq.
def : MnemonicAlias<"lret", "lretw", "att">, Requires<[In16BitMode]>;
def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>;
+def : InstAlias<"lgdtw\t$src", (LGDT16m opaquemem:$src), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"lgdtq16\t$src", (LGDT64m_16 opaquemem:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"retfw", (LRET16), 0>;
def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>;
def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
@@ -723,6 +726,7 @@ def : InstAlias<"shrd{l}\t{$reg, $mem|$mem, $reg}", (SHRD32mrCL i32mem:$mem, GR3
def : InstAlias<"shrd{q}\t{$reg, $mem|$mem, $reg}", (SHRD64mrCL i64mem:$mem, GR64:$reg), 0>;
// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"ljmpw\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
def : InstAlias<"test{b}\t{$mem, $val|$val, $mem}",
(TEST8mr i8mem :$mem, GR8 :$val), 0>;
def : InstAlias<"test{w}\t{$mem, $val|$val, $mem}",
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index eb0b5a43afdf9..0fe6d854f7637 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -427,6 +427,9 @@ def LGDT32m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
"lgdt{l|d}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
def LGDT64m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
"lgdt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
+// Special variant with data16 prefix (OpSize16) for 64-bit mode
+def LGDT64m_16 : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
+ "lgdt{q}\t$src", []>, TB, OpSize16, Requires<[In64BitMode]>;
def LIDT16m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
"lidtw\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def LIDT32m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
diff --git a/llvm/test/MC/X86/intel-syntax-32.s b/llvm/test/MC/X86/intel-syntax-32.s
index a503a256ce213..1e93f6365f6cd 100644
--- a/llvm/test/MC/X86/intel-syntax-32.s
+++ b/llvm/test/MC/X86/intel-syntax-32.s
@@ -1,6 +1,33 @@
-// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel --show-encoding %s | FileCheck %s
// CHECK: leaw (%bp,%si), %ax
lea ax, [bp+si]
// CHECK: leaw (%bp,%si), %ax
lea ax, [si+bp]
+
+// CHECK: encoding: [0x66,0x6a,0x08]
+ data16 push 8
+
+// CHECK: encoding: [0x6a,0x08]
+ push 8
+
+// CHECK: encoding: [0x66,0xcb]
+ data16 retf
+
+// CHECK: encoding: [0xcb]
+ retf
+
+// CHECK: encoding: [0x66,0x9a,0xcd,0xab,0xce,0x7a]
+ data16 call 0x7ace, 0xabcd
+
+// CHECK: encoding: [0x9a,0xcd,0xab,0x00,0x00,0xce,0x7a]
+ call 0x7ace, 0xabcd
+
+// CHECK: encoding: [0xe8,A,A,A,A]
+ call a
+
+// CHECK: encoding: [0x66,0xea,0xcd,0xab,0xce,0x7a]
+ data16 ljmp 0x7ace, 0xabcd
+
+// CHECK: encoding: [0xea,0xcd,0xab,0x00,0x00,0xce,0x7a]
+ ljmp 0x7ace, 0xabcd
More information about the llvm-commits
mailing list