[llvm] 36978fa - [MC] Add UseAtForSpecifier
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 1 00:06:10 PDT 2025
Author: Fangrui Song
Date: 2025-04-01T00:06:05-07:00
New Revision: 36978fadb8e14c944b71fa63b876012cb2c444c2
URL: https://github.com/llvm/llvm-project/commit/36978fadb8e14c944b71fa63b876012cb2c444c2
DIFF: https://github.com/llvm/llvm-project/commit/36978fadb8e14c944b71fa63b876012cb2c444c2.diff
LOG: [MC] Add UseAtForSpecifier
Some ELF targets don't use @ for relocation specifiers.
We should not report `error: invalid variant` when @ is used.
Attempt to make expr at specifier parsing less hacky.
Added:
Modified:
llvm/include/llvm/MC/MCAsmInfo.h
llvm/include/llvm/MC/MCParser/MCAsmLexer.h
llvm/lib/MC/MCExpr.cpp
llvm/lib/MC/MCParser/AsmLexer.cpp
llvm/lib/MC/MCParser/AsmParser.cpp
llvm/lib/MC/MCParser/ELFAsmParser.cpp
llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
llvm/test/MC/RISCV/pseudo-jump-invalid.s
llvm/test/MC/RISCV/rv32i-aliases-invalid.s
llvm/test/MC/RISCV/rv64i-aliases-invalid.s
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index 3134ee02f54be..6714abac5c726 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -377,9 +377,12 @@ class MCAsmInfo {
/// names in .cfi_* directives. Defaults to false.
bool DwarfRegNumForCFI = false;
- /// True if target uses parens to indicate the symbol variant instead of @.
- /// For example, foo(plt) instead of foo at plt. Defaults to false.
- bool UseParensForSymbolVariant = false;
+ /// True if target uses @ (expr at specifier) for relocation specifiers.
+ bool UseAtForSpecifier = true;
+
+ /// (ARM-specific) Uses parens for relocation specifier in data
+ /// directives, e.g. .word foo(got).
+ bool UseParensForSpecifier = false;
/// True if the target uses parens for symbol names starting with
/// '$' character to distinguish them from absolute names.
@@ -649,7 +652,8 @@ class MCAsmInfo {
bool doDwarfFDESymbolsUseAbsDiff() const { return DwarfFDESymbolsUseAbsDiff; }
bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; }
- bool useParensForSymbolVariant() const { return UseParensForSymbolVariant; }
+ bool useAtForSpecifier() const { return UseAtForSpecifier; }
+ bool useParensForSpecifier() const { return UseParensForSpecifier; }
bool supportsExtendedDwarfLocDirective() const {
return SupportsExtendedDwarfLocDirective;
}
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 9affb1f980bb0..61b89b9a103f4 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -15,6 +15,7 @@
#include <cassert>
#include <cstddef>
#include <string>
+#include <utility>
namespace llvm {
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index fa5c3dab1f115..773df74291064 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -93,7 +93,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI,
if (Kind != MCSymbolRefExpr::VK_None) {
if (!MAI) // should only be used by dump()
OS << "@<variant " << Kind << '>';
- else if (MAI->useParensForSymbolVariant()) // ARM
+ else if (MAI->useParensForSpecifier()) // ARM
OS << '(' << MAI->getSpecifierName(Kind) << ')';
else
OS << '@' << MAI->getSpecifierName(Kind);
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 23836438027c0..8715f94d51fe5 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -32,7 +32,10 @@
using namespace llvm;
AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
- AllowAtInIdentifier = !StringRef(MAI.getCommentString()).starts_with("@");
+ // For COFF targets, this is true, while for ELF targets, it should be false.
+ // Currently, @specifier parsing depends on '@' being included in the token.
+ AllowAtInIdentifier = !StringRef(MAI.getCommentString()).starts_with("@") &&
+ MAI.useAtForSpecifier();
LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
}
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 65a38009a8488..17417f292e053 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1191,9 +1191,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
return false;
}
}
- // Parse symbol variant
+ // Parse an optional relocation specifier.
std::pair<StringRef, StringRef> Split;
- if (!MAI.useParensForSymbolVariant()) {
+ if (MAI.useAtForSpecifier()) {
if (FirstTokenKind == AsmToken::String) {
if (Lexer.is(AsmToken::At)) {
Lex(); // eat @
@@ -1207,8 +1207,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
} else {
Split = Identifier.split('@');
}
- } else if (Lexer.is(AsmToken::LParen)) {
- Lex(); // eat '('.
+ } else if (MAI.useParensForSpecifier() &&
+ parseOptionalToken(AsmToken::LParen)) {
StringRef VName;
parseIdentifier(VName);
if (parseRParen())
@@ -1231,7 +1231,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
if (MaybeVariant) {
SymbolName = Split.first;
Variant = MCSymbolRefExpr::VariantKind(*MaybeVariant);
- } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
+ } else if (MAI.doesAllowAtInName()) {
Variant = MCSymbolRefExpr::VK_None;
} else {
return Error(SMLoc::getFromPointer(Split.second.begin()),
@@ -1463,7 +1463,8 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
// As a special case, we support 'a op b @ modifier' by rewriting the
// expression to include the modifier. This is inefficient, but in general we
// expect users to use 'a at modifier op b'.
- if (parseOptionalToken(AsmToken::At)) {
+ if (Ctx.getAsmInfo()->useAtForSpecifier() &&
+ parseOptionalToken(AsmToken::At)) {
if (Lexer.isNot(AsmToken::Identifier))
return TokError("unexpected symbol modifier following '@'");
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index c94ddfa087fd3..70550d269002b 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
@@ -741,6 +742,13 @@ bool ELFAsmParser::parseDirectiveType(StringRef, SMLoc) {
// Handle the identifier as the key symbol.
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ bool AllowAt = getLexer().getAllowAtInIdentifier();
+ if (!AllowAt &&
+ !getContext().getAsmInfo()->getCommentString().starts_with("@"))
+ getLexer().setAllowAtInIdentifier(true);
+ auto _ =
+ make_scope_exit([&]() { getLexer().setAllowAtInIdentifier(AllowAt); });
+
// NOTE the comma is optional in all cases. It is only documented as being
// optional in the first case, however, GAS will silently treat the comma as
// optional in all cases. Furthermore, although the documentation states that
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index f38b73a784632..789f7ec09d759 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -101,7 +101,8 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
}
// foo(plt) instead of foo at plt
- UseParensForSymbolVariant = true;
+ UseAtForSpecifier = false;
+ UseParensForSpecifier = true;
initializeVariantKinds(variantKindDescs);
}
@@ -148,7 +149,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::WinEH;
WinEHEncodingType = WinEH::EncodingType::Itanium;
- UseParensForSymbolVariant = true;
+ UseAtForSpecifier = false;
+ UseParensForSpecifier = true;
DwarfRegNumForCFI = false;
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index c1670326143e3..d65eaac3716a1 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2079,19 +2079,23 @@ ParseStatus RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
if (getLexer().getKind() != AsmToken::Identifier)
return ParseStatus::NoMatch;
+ std::string Identifier(getTok().getIdentifier());
- // Avoid parsing the register in `call rd, foo` as a call symbol.
- if (getLexer().peekTok().getKind() != AsmToken::EndOfStatement)
+ if (getLexer().peekTok().is(AsmToken::At)) {
+ Lex();
+ Lex();
+ StringRef PLT;
+ if (getParser().parseIdentifier(PLT) || PLT != "plt")
+ return ParseStatus::Failure;
+ } else if (!getLexer().peekTok().is(AsmToken::EndOfStatement)) {
+ // Avoid parsing the register in `call rd, foo` as a call symbol.
return ParseStatus::NoMatch;
-
- StringRef Identifier;
- if (getParser().parseIdentifier(Identifier))
- return ParseStatus::Failure;
+ } else {
+ Lex();
+ }
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
-
RISCVMCExpr::Specifier Kind = RISCVMCExpr::VK_CALL_PLT;
- (void)Identifier.consume_back("@plt");
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
Res = MCSymbolRefExpr::create(Sym, getContext());
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 7e9b312d3c25e..d1e8ec9d6b54a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -26,6 +26,7 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
AlignmentIsInBytes = false;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
+ UseAtForSpecifier = false;
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
}
diff --git a/llvm/test/MC/RISCV/pseudo-jump-invalid.s b/llvm/test/MC/RISCV/pseudo-jump-invalid.s
index 834b5a186b007..18640b6617ea3 100644
--- a/llvm/test/MC/RISCV/pseudo-jump-invalid.s
+++ b/llvm/test/MC/RISCV/pseudo-jump-invalid.s
@@ -1,5 +1,6 @@
# RUN: not llvm-mc -triple riscv32 < %s 2>&1 | FileCheck %s
jump 1234, x31 # CHECK: :[[@LINE]]:6: error: operand must be a valid jump target
-jump foo at plt, x31 # CHECK: :[[@LINE]]:10: error: invalid variant 'plt'
+jump foo at plt, x31 # CHECK: :[[@LINE]]:9: error: unexpected token
jump %pcrel_lo(1234), x31 # CHECK: :[[@LINE]]:6: error: unknown token in expression
+jump foo at xxx # CHECK: :[[@LINE]]:9: error: unexpected token
diff --git a/llvm/test/MC/RISCV/rv32i-aliases-invalid.s b/llvm/test/MC/RISCV/rv32i-aliases-invalid.s
index 7f54fe720ea48..63bc1fa09a4a2 100644
--- a/llvm/test/MC/RISCV/rv32i-aliases-invalid.s
+++ b/llvm/test/MC/RISCV/rv32i-aliases-invalid.s
@@ -32,7 +32,7 @@ lla x1, %hi(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a bare sy
lla x1, %lo(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
lla x1, %hi(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
lla x1, %lo(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
-lla a2, foo at plt # CHECK: :[[@LINE]]:17: error: '@plt' operand not valid for instruction
+lla a2, foo at plt # CHECK: :[[@LINE]]:12: error: unexpected token
negw x1, x2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
sext.w x3, x4 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
diff --git a/llvm/test/MC/RISCV/rv64i-aliases-invalid.s b/llvm/test/MC/RISCV/rv64i-aliases-invalid.s
index 1bd4e78007c83..cc35346cb8801 100644
--- a/llvm/test/MC/RISCV/rv64i-aliases-invalid.s
+++ b/llvm/test/MC/RISCV/rv64i-aliases-invalid.s
@@ -26,7 +26,7 @@ lla x1, %lo(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a constan
lla x1, %hi(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
lla x1, %lo(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
lla a1, foo+foo # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
-lla a2, foo at plt # CHECK: :[[@LINE]]:17: error: '@plt' operand not valid for instruction
+lla a2, foo at plt # CHECK: :[[@LINE]]:12: error: unexpected token
rdinstreth x29 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV32I Base Instruction Set{{$}}
rdcycleh x27 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV32I Base Instruction Set{{$}}
More information about the llvm-commits
mailing list