[llvm] [AArch64][llvm] Disassemble instructions in `SYS` alias encoding space more correctly (PR #153905)

Jonathan Thackray via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 18 06:07:15 PDT 2025


https://github.com/jthackray updated https://github.com/llvm/llvm-project/pull/153905

>From 9a0636a4a77c000c03065ea020be01141adb3234 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Sat, 16 Aug 2025 01:19:52 +0100
Subject: [PATCH 1/3] [AArch64][llvm] Add support for optional register in
 `SYS` alias instructions

Add support for future AArch64 instructions in the `SYS` alias encoding
space which may support an optional register as an operand. For example:

```
   SYS #4, c8, c5, #2, {<Xt>}
```

Currently, AArch64 `SYS` alias instructions fall into two categories:
  * a register value must be present (indicated by any value except `XZR`)
  * no register value must be present (this value must be `XZR`)

This is defined by the optional "needsreg" parameter in TableGen (e.g. see
the multiclass definition for TLBI), which defaults to requiring a register
operand for the instruction to be valid.

Expand the code so that if an optional register operand is specified
in TableGen, then if it's not `XZR`, encode this value in the `SYS` alias
and for disassembly, print this value if it's not `XZR`. Don't produce an
error message if the register operand is missing or unexpected, if it is
specified as an optional register.

If a mandatory or optional register is not specified in the TableGen
(i.e. no register operand should be present), then disassemble to a SYS
alias instead if the register value is not xzr/x31 (encoded as 0b11111).

For instructions taking no register operands, this is specified in the
Arm ARM, with language similar to:
```
  Rt should be encoded as 0b11111. If the Rt field is not set to 0b11111,
  it is CONSTRAINED UNPREDICTABLE whether:
    * The instruction is UNDEFINED.
    * The instruction behaves as if the Rt field is set to 0b11111.
```
and since we want to follow "should" directives, and not encourage
undefined behaviour, these are not considered valid instructions, so
should not be assembled or disassembled as such.
---
 .../AArch64/AsmParser/AArch64AsmParser.cpp     | 13 ++++++++-----
 .../MCTargetDesc/AArch64InstPrinter.cpp        | 18 +++++++++++++++---
 .../lib/Target/AArch64/Utils/AArch64BaseInfo.h | 10 ++++++++++
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1ca61f5c6b349..763a5bfcb0c62 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3908,6 +3908,8 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
   StringRef Op = Tok.getString();
   SMLoc S = Tok.getLoc();
   bool ExpectRegister = true;
+  bool OptionalRegister = false;
+  bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
 
   if (Mnemonic == "ic") {
     const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
@@ -3956,7 +3958,6 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     if (Op.lower() != "rctx")
       return TokError("invalid operand for prediction restriction instruction");
 
-    bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
     bool hasPredres = hasAll || getSTI().hasFeature(AArch64::FeaturePredRes);
     bool hasSpecres2 = hasAll || getSTI().hasFeature(AArch64::FeatureSPECRES2);
 
@@ -3989,10 +3990,12 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     HasRegister = true;
   }
 
-  if (ExpectRegister && !HasRegister)
-    return TokError("specified " + Mnemonic + " op requires a register");
-  else if (!ExpectRegister && HasRegister)
-    return TokError("specified " + Mnemonic + " op does not use a register");
+  if (!OptionalRegister) {
+    if (ExpectRegister && !HasRegister)
+      return TokError("specified " + Mnemonic + " op requires a register");
+    else if (!ExpectRegister && HasRegister)
+      return TokError("specified " + Mnemonic + " op does not use a register");
+  }
 
   if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
     return true;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 3c8b5712c1f0c..ae277e5b18b8c 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -917,6 +917,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
   Encoding |= Op1Val << 11;
 
   bool NeedsReg;
+  bool OptionalReg = false;
   std::string Ins;
   std::string Name;
 
@@ -1017,13 +1018,24 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
   else
     return false;
 
+  StringRef Reg = getRegisterName(MI->getOperand(4).getReg());
+  bool NotXZR = Reg != "xzr";
+
+  // If a mandatory or optional register is not specified in the TableGen
+  // (i.e. no register operand should be present), and the register value
+  // is not xzr/x31, then disassemble to a SYS alias instead.
+  if (NotXZR && !NeedsReg && !OptionalReg)
+    return false;
+
   std::string Str = Ins + Name;
   llvm::transform(Str, Str.begin(), ::tolower);
 
   O << '\t' << Str;
-  if (NeedsReg) {
-    O << ", ";
-    printRegName(O, MI->getOperand(4).getReg());
+
+  // For optional registers, don't print the value if it's xzr/x31
+  // since this defaults to xzr/x31 if register is not specified.
+  if (NeedsReg || (OptionalReg && NotXZR)) {
+    O << ", " << Reg;
   }
 
   return true;
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index a4ee963e2cce0..857a3c73f1cc0 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -409,6 +409,16 @@ struct SysAliasReg : SysAlias {
       : SysAlias(N, E, F), NeedsReg(R) {}
 };
 
+struct SysAliasOptionalReg : SysAlias {
+  bool NeedsReg;
+  bool OptionalReg;
+  constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O)
+      : SysAlias(N, E), NeedsReg(R), OptionalReg(O) {}
+  constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O,
+                                FeatureBitset F)
+      : SysAlias(N, E, F), NeedsReg(R), OptionalReg(O) {}
+};
+
 struct SysAliasImm : SysAlias {
   uint16_t ImmValue;
   constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)

>From 4180996d619c1e3f933a8072b3fe14959cb8b752 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Mon, 18 Aug 2025 13:34:28 +0100
Subject: [PATCH 2/3] fixup! Address CR comments and remove optional register
 code (will push this later)

---
 .../Target/AArch64/AsmParser/AArch64AsmParser.cpp  | 13 +++++--------
 .../AArch64/MCTargetDesc/AArch64InstPrinter.cpp    |  9 +++------
 llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h    | 10 ----------
 llvm/test/MC/AArch64/arm64-aliases.s               | 14 ++++++++++++++
 4 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 763a5bfcb0c62..1ca61f5c6b349 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3908,8 +3908,6 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
   StringRef Op = Tok.getString();
   SMLoc S = Tok.getLoc();
   bool ExpectRegister = true;
-  bool OptionalRegister = false;
-  bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
 
   if (Mnemonic == "ic") {
     const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
@@ -3958,6 +3956,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     if (Op.lower() != "rctx")
       return TokError("invalid operand for prediction restriction instruction");
 
+    bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
     bool hasPredres = hasAll || getSTI().hasFeature(AArch64::FeaturePredRes);
     bool hasSpecres2 = hasAll || getSTI().hasFeature(AArch64::FeatureSPECRES2);
 
@@ -3990,12 +3989,10 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     HasRegister = true;
   }
 
-  if (!OptionalRegister) {
-    if (ExpectRegister && !HasRegister)
-      return TokError("specified " + Mnemonic + " op requires a register");
-    else if (!ExpectRegister && HasRegister)
-      return TokError("specified " + Mnemonic + " op does not use a register");
-  }
+  if (ExpectRegister && !HasRegister)
+    return TokError("specified " + Mnemonic + " op requires a register");
+  else if (!ExpectRegister && HasRegister)
+    return TokError("specified " + Mnemonic + " op does not use a register");
 
   if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
     return true;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index ae277e5b18b8c..5939c87992970 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -917,7 +917,6 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
   Encoding |= Op1Val << 11;
 
   bool NeedsReg;
-  bool OptionalReg = false;
   std::string Ins;
   std::string Name;
 
@@ -1021,10 +1020,10 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
   StringRef Reg = getRegisterName(MI->getOperand(4).getReg());
   bool NotXZR = Reg != "xzr";
 
-  // If a mandatory or optional register is not specified in the TableGen
+  // If a mandatory is not specified in the TableGen
   // (i.e. no register operand should be present), and the register value
   // is not xzr/x31, then disassemble to a SYS alias instead.
-  if (NotXZR && !NeedsReg && !OptionalReg)
+  if (NotXZR && !NeedsReg)
     return false;
 
   std::string Str = Ins + Name;
@@ -1032,9 +1031,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
 
   O << '\t' << Str;
 
-  // For optional registers, don't print the value if it's xzr/x31
-  // since this defaults to xzr/x31 if register is not specified.
-  if (NeedsReg || (OptionalReg && NotXZR)) {
+  if (NeedsReg) {
     O << ", " << Reg;
   }
 
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 857a3c73f1cc0..a4ee963e2cce0 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -409,16 +409,6 @@ struct SysAliasReg : SysAlias {
       : SysAlias(N, E, F), NeedsReg(R) {}
 };
 
-struct SysAliasOptionalReg : SysAlias {
-  bool NeedsReg;
-  bool OptionalReg;
-  constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O)
-      : SysAlias(N, E), NeedsReg(R), OptionalReg(O) {}
-  constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O,
-                                FeatureBitset F)
-      : SysAlias(N, E, F), NeedsReg(R), OptionalReg(O) {}
-};
-
 struct SysAliasImm : SysAlias {
   uint16_t ImmValue;
   constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
diff --git a/llvm/test/MC/AArch64/arm64-aliases.s b/llvm/test/MC/AArch64/arm64-aliases.s
index 3ace7a0f7183b..ae157c676c95f 100644
--- a/llvm/test/MC/AArch64/arm64-aliases.s
+++ b/llvm/test/MC/AArch64/arm64-aliases.s
@@ -512,6 +512,20 @@ foo:
   sys #4, c8, c3, #6
 ; CHECK: tlbi vmalls12e1is
 
+; Check that all 5 register bits are set (0x31):
+;    (from Arm ARM regarding TLBI instructions without operands)
+;    "Rt should be encoded as 0b11111. If the Rt field is not set to 0b11111,
+;     it is CONSTRAINED UNPREDICTABLE whether:
+;       * The instruction is UNDEFINED.
+;       * The instruction behaves as if the Rt field is set to 0b11111."
+;
+; Do not disassemble this to `tlbi` but a SYS alias instead
+;
+  sys #4, c8, c7, #6, x30
+; CHECK: sys #0x4, c8, c7, #0x6, x30
+  sys #4, c8, c7, #6, x31
+; CHECK: tlbi vmalls12e1
+
   ic ialluis
 ; CHECK: ic ialluis                 ; encoding: [0x1f,0x71,0x08,0xd5]
   ic iallu

>From fc40f88b0f54fcb3ea77359fee001dc97f5d3b53 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Mon, 18 Aug 2025 14:06:58 +0100
Subject: [PATCH 3/3] fixup! Fix CR comment

---
 llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 5939c87992970..54b58e948daf2 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1031,9 +1031,8 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
 
   O << '\t' << Str;
 
-  if (NeedsReg) {
+  if (NeedsReg)
     O << ", " << Reg;
-  }
 
   return true;
 }



More information about the llvm-commits mailing list