[llvm] [AArch64][llvm] Disassemble instructions in `SYS` alias encoding space more correctly (PR #153905)
Jonathan Thackray via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 18 06:07:15 PDT 2025
https://github.com/jthackray updated https://github.com/llvm/llvm-project/pull/153905
>From 9a0636a4a77c000c03065ea020be01141adb3234 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Sat, 16 Aug 2025 01:19:52 +0100
Subject: [PATCH 1/3] [AArch64][llvm] Add support for optional register in
`SYS` alias instructions
Add support for future AArch64 instructions in the `SYS` alias encoding
space which may support an optional register as an operand. For example:
```
SYS #4, c8, c5, #2, {<Xt>}
```
Currently, AArch64 `SYS` alias instructions fall into two categories:
* a register value must be present (indicated by any value except `XZR`)
* no register value must be present (this value must be `XZR`)
This is defined by the optional "needsreg" parameter in TableGen (e.g. see
the multiclass definition for TLBI), which defaults to requiring a register
operand for the instruction to be valid.
Expand the code so that if an optional register operand is specified
in TableGen, then if it's not `XZR`, encode this value in the `SYS` alias
and for disassembly, print this value if it's not `XZR`. Don't produce an
error message if the register operand is missing or unexpected, if it is
specified as an optional register.
If a mandatory or optional register is not specified in the TableGen
(i.e. no register operand should be present), then disassemble to a SYS
alias instead if the register value is not xzr/x31 (encoded as 0b11111).
For instructions taking no register operands, this is specified in the
Arm ARM, with language similar to:
```
Rt should be encoded as 0b11111. If the Rt field is not set to 0b11111,
it is CONSTRAINED UNPREDICTABLE whether:
* The instruction is UNDEFINED.
* The instruction behaves as if the Rt field is set to 0b11111.
```
and since we want to follow "should" directives, and not encourage
undefined behaviour, these are not considered valid instructions, so
should not be assembled or disassembled as such.
---
.../AArch64/AsmParser/AArch64AsmParser.cpp | 13 ++++++++-----
.../MCTargetDesc/AArch64InstPrinter.cpp | 18 +++++++++++++++---
.../lib/Target/AArch64/Utils/AArch64BaseInfo.h | 10 ++++++++++
3 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1ca61f5c6b349..763a5bfcb0c62 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3908,6 +3908,8 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
bool ExpectRegister = true;
+ bool OptionalRegister = false;
+ bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
if (Mnemonic == "ic") {
const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
@@ -3956,7 +3958,6 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (Op.lower() != "rctx")
return TokError("invalid operand for prediction restriction instruction");
- bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
bool hasPredres = hasAll || getSTI().hasFeature(AArch64::FeaturePredRes);
bool hasSpecres2 = hasAll || getSTI().hasFeature(AArch64::FeatureSPECRES2);
@@ -3989,10 +3990,12 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
HasRegister = true;
}
- if (ExpectRegister && !HasRegister)
- return TokError("specified " + Mnemonic + " op requires a register");
- else if (!ExpectRegister && HasRegister)
- return TokError("specified " + Mnemonic + " op does not use a register");
+ if (!OptionalRegister) {
+ if (ExpectRegister && !HasRegister)
+ return TokError("specified " + Mnemonic + " op requires a register");
+ else if (!ExpectRegister && HasRegister)
+ return TokError("specified " + Mnemonic + " op does not use a register");
+ }
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
return true;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 3c8b5712c1f0c..ae277e5b18b8c 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -917,6 +917,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
Encoding |= Op1Val << 11;
bool NeedsReg;
+ bool OptionalReg = false;
std::string Ins;
std::string Name;
@@ -1017,13 +1018,24 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
else
return false;
+ StringRef Reg = getRegisterName(MI->getOperand(4).getReg());
+ bool NotXZR = Reg != "xzr";
+
+ // If a mandatory or optional register is not specified in the TableGen
+ // (i.e. no register operand should be present), and the register value
+ // is not xzr/x31, then disassemble to a SYS alias instead.
+ if (NotXZR && !NeedsReg && !OptionalReg)
+ return false;
+
std::string Str = Ins + Name;
llvm::transform(Str, Str.begin(), ::tolower);
O << '\t' << Str;
- if (NeedsReg) {
- O << ", ";
- printRegName(O, MI->getOperand(4).getReg());
+
+ // For optional registers, don't print the value if it's xzr/x31
+ // since this defaults to xzr/x31 if register is not specified.
+ if (NeedsReg || (OptionalReg && NotXZR)) {
+ O << ", " << Reg;
}
return true;
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index a4ee963e2cce0..857a3c73f1cc0 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -409,6 +409,16 @@ struct SysAliasReg : SysAlias {
: SysAlias(N, E, F), NeedsReg(R) {}
};
+struct SysAliasOptionalReg : SysAlias {
+ bool NeedsReg;
+ bool OptionalReg;
+ constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O)
+ : SysAlias(N, E), NeedsReg(R), OptionalReg(O) {}
+ constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O,
+ FeatureBitset F)
+ : SysAlias(N, E, F), NeedsReg(R), OptionalReg(O) {}
+};
+
struct SysAliasImm : SysAlias {
uint16_t ImmValue;
constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
>From 4180996d619c1e3f933a8072b3fe14959cb8b752 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Mon, 18 Aug 2025 13:34:28 +0100
Subject: [PATCH 2/3] fixup! Address CR comments and remove optional register
code (will push this later)
---
.../Target/AArch64/AsmParser/AArch64AsmParser.cpp | 13 +++++--------
.../AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 9 +++------
llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h | 10 ----------
llvm/test/MC/AArch64/arm64-aliases.s | 14 ++++++++++++++
4 files changed, 22 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 763a5bfcb0c62..1ca61f5c6b349 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3908,8 +3908,6 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
bool ExpectRegister = true;
- bool OptionalRegister = false;
- bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
if (Mnemonic == "ic") {
const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
@@ -3958,6 +3956,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
if (Op.lower() != "rctx")
return TokError("invalid operand for prediction restriction instruction");
+ bool hasAll = getSTI().hasFeature(AArch64::FeatureAll);
bool hasPredres = hasAll || getSTI().hasFeature(AArch64::FeaturePredRes);
bool hasSpecres2 = hasAll || getSTI().hasFeature(AArch64::FeatureSPECRES2);
@@ -3990,12 +3989,10 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
HasRegister = true;
}
- if (!OptionalRegister) {
- if (ExpectRegister && !HasRegister)
- return TokError("specified " + Mnemonic + " op requires a register");
- else if (!ExpectRegister && HasRegister)
- return TokError("specified " + Mnemonic + " op does not use a register");
- }
+ if (ExpectRegister && !HasRegister)
+ return TokError("specified " + Mnemonic + " op requires a register");
+ else if (!ExpectRegister && HasRegister)
+ return TokError("specified " + Mnemonic + " op does not use a register");
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
return true;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index ae277e5b18b8c..5939c87992970 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -917,7 +917,6 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
Encoding |= Op1Val << 11;
bool NeedsReg;
- bool OptionalReg = false;
std::string Ins;
std::string Name;
@@ -1021,10 +1020,10 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
StringRef Reg = getRegisterName(MI->getOperand(4).getReg());
bool NotXZR = Reg != "xzr";
- // If a mandatory or optional register is not specified in the TableGen
+ // If a mandatory is not specified in the TableGen
// (i.e. no register operand should be present), and the register value
// is not xzr/x31, then disassemble to a SYS alias instead.
- if (NotXZR && !NeedsReg && !OptionalReg)
+ if (NotXZR && !NeedsReg)
return false;
std::string Str = Ins + Name;
@@ -1032,9 +1031,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
O << '\t' << Str;
- // For optional registers, don't print the value if it's xzr/x31
- // since this defaults to xzr/x31 if register is not specified.
- if (NeedsReg || (OptionalReg && NotXZR)) {
+ if (NeedsReg) {
O << ", " << Reg;
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 857a3c73f1cc0..a4ee963e2cce0 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -409,16 +409,6 @@ struct SysAliasReg : SysAlias {
: SysAlias(N, E, F), NeedsReg(R) {}
};
-struct SysAliasOptionalReg : SysAlias {
- bool NeedsReg;
- bool OptionalReg;
- constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O)
- : SysAlias(N, E), NeedsReg(R), OptionalReg(O) {}
- constexpr SysAliasOptionalReg(const char *N, uint16_t E, bool R, bool O,
- FeatureBitset F)
- : SysAlias(N, E, F), NeedsReg(R), OptionalReg(O) {}
-};
-
struct SysAliasImm : SysAlias {
uint16_t ImmValue;
constexpr SysAliasImm(const char *N, uint16_t E, uint16_t I)
diff --git a/llvm/test/MC/AArch64/arm64-aliases.s b/llvm/test/MC/AArch64/arm64-aliases.s
index 3ace7a0f7183b..ae157c676c95f 100644
--- a/llvm/test/MC/AArch64/arm64-aliases.s
+++ b/llvm/test/MC/AArch64/arm64-aliases.s
@@ -512,6 +512,20 @@ foo:
sys #4, c8, c3, #6
; CHECK: tlbi vmalls12e1is
+; Check that all 5 register bits are set (0x31):
+; (from Arm ARM regarding TLBI instructions without operands)
+; "Rt should be encoded as 0b11111. If the Rt field is not set to 0b11111,
+; it is CONSTRAINED UNPREDICTABLE whether:
+; * The instruction is UNDEFINED.
+; * The instruction behaves as if the Rt field is set to 0b11111."
+;
+; Do not disassemble this to `tlbi` but a SYS alias instead
+;
+ sys #4, c8, c7, #6, x30
+; CHECK: sys #0x4, c8, c7, #0x6, x30
+ sys #4, c8, c7, #6, x31
+; CHECK: tlbi vmalls12e1
+
ic ialluis
; CHECK: ic ialluis ; encoding: [0x1f,0x71,0x08,0xd5]
ic iallu
>From fc40f88b0f54fcb3ea77359fee001dc97f5d3b53 Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jonathan.thackray at arm.com>
Date: Mon, 18 Aug 2025 14:06:58 +0100
Subject: [PATCH 3/3] fixup! Fix CR comment
---
llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 5939c87992970..54b58e948daf2 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1031,9 +1031,8 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
O << '\t' << Str;
- if (NeedsReg) {
+ if (NeedsReg)
O << ", " << Reg;
- }
return true;
}
More information about the llvm-commits
mailing list