[llvm] 36978fa - [MC] Add UseAtForSpecifier

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 1 00:06:10 PDT 2025


Author: Fangrui Song
Date: 2025-04-01T00:06:05-07:00
New Revision: 36978fadb8e14c944b71fa63b876012cb2c444c2

URL: https://github.com/llvm/llvm-project/commit/36978fadb8e14c944b71fa63b876012cb2c444c2
DIFF: https://github.com/llvm/llvm-project/commit/36978fadb8e14c944b71fa63b876012cb2c444c2.diff

LOG: [MC] Add UseAtForSpecifier

Some ELF targets don't use @ for relocation specifiers.
We should not report `error: invalid variant` when @ is used.

Attempt to make expr at specifier parsing less hacky.

Added: 
    

Modified: 
    llvm/include/llvm/MC/MCAsmInfo.h
    llvm/include/llvm/MC/MCParser/MCAsmLexer.h
    llvm/lib/MC/MCExpr.cpp
    llvm/lib/MC/MCParser/AsmLexer.cpp
    llvm/lib/MC/MCParser/AsmParser.cpp
    llvm/lib/MC/MCParser/ELFAsmParser.cpp
    llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
    llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
    llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
    llvm/test/MC/RISCV/pseudo-jump-invalid.s
    llvm/test/MC/RISCV/rv32i-aliases-invalid.s
    llvm/test/MC/RISCV/rv64i-aliases-invalid.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index 3134ee02f54be..6714abac5c726 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -377,9 +377,12 @@ class MCAsmInfo {
   /// names in .cfi_* directives.  Defaults to false.
   bool DwarfRegNumForCFI = false;
 
-  /// True if target uses parens to indicate the symbol variant instead of @.
-  /// For example, foo(plt) instead of foo at plt.  Defaults to false.
-  bool UseParensForSymbolVariant = false;
+  /// True if target uses @ (expr at specifier) for relocation specifiers.
+  bool UseAtForSpecifier = true;
+
+  /// (ARM-specific) Uses parens for relocation specifier in data
+  /// directives, e.g. .word foo(got).
+  bool UseParensForSpecifier = false;
 
   /// True if the target uses parens for symbol names starting with
   /// '$' character to distinguish them from absolute names.
@@ -649,7 +652,8 @@ class MCAsmInfo {
 
   bool doDwarfFDESymbolsUseAbsDiff() const { return DwarfFDESymbolsUseAbsDiff; }
   bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; }
-  bool useParensForSymbolVariant() const { return UseParensForSymbolVariant; }
+  bool useAtForSpecifier() const { return UseAtForSpecifier; }
+  bool useParensForSpecifier() const { return UseParensForSpecifier; }
   bool supportsExtendedDwarfLocDirective() const {
     return SupportsExtendedDwarfLocDirective;
   }

diff  --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 9affb1f980bb0..61b89b9a103f4 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -15,6 +15,7 @@
 #include <cassert>
 #include <cstddef>
 #include <string>
+#include <utility>
 
 namespace llvm {
 

diff  --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index fa5c3dab1f115..773df74291064 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -93,7 +93,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI,
     if (Kind != MCSymbolRefExpr::VK_None) {
       if (!MAI) // should only be used by dump()
         OS << "@<variant " << Kind << '>';
-      else if (MAI->useParensForSymbolVariant()) // ARM
+      else if (MAI->useParensForSpecifier()) // ARM
         OS << '(' << MAI->getSpecifierName(Kind) << ')';
       else
         OS << '@' << MAI->getSpecifierName(Kind);

diff  --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 23836438027c0..8715f94d51fe5 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -32,7 +32,10 @@
 using namespace llvm;
 
 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
-  AllowAtInIdentifier = !StringRef(MAI.getCommentString()).starts_with("@");
+  // For COFF targets, this is true, while for ELF targets, it should be false.
+  // Currently, @specifier parsing depends on '@' being included in the token.
+  AllowAtInIdentifier = !StringRef(MAI.getCommentString()).starts_with("@") &&
+                        MAI.useAtForSpecifier();
   LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
 }
 

diff  --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 65a38009a8488..17417f292e053 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1191,9 +1191,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
         return false;
       }
     }
-    // Parse symbol variant
+    // Parse an optional relocation specifier.
     std::pair<StringRef, StringRef> Split;
-    if (!MAI.useParensForSymbolVariant()) {
+    if (MAI.useAtForSpecifier()) {
       if (FirstTokenKind == AsmToken::String) {
         if (Lexer.is(AsmToken::At)) {
           Lex(); // eat @
@@ -1207,8 +1207,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
       } else {
         Split = Identifier.split('@');
       }
-    } else if (Lexer.is(AsmToken::LParen)) {
-      Lex(); // eat '('.
+    } else if (MAI.useParensForSpecifier() &&
+               parseOptionalToken(AsmToken::LParen)) {
       StringRef VName;
       parseIdentifier(VName);
       if (parseRParen())
@@ -1231,7 +1231,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
       if (MaybeVariant) {
         SymbolName = Split.first;
         Variant = MCSymbolRefExpr::VariantKind(*MaybeVariant);
-      } else if (MAI.doesAllowAtInName() && !MAI.useParensForSymbolVariant()) {
+      } else if (MAI.doesAllowAtInName()) {
         Variant = MCSymbolRefExpr::VK_None;
       } else {
         return Error(SMLoc::getFromPointer(Split.second.begin()),
@@ -1463,7 +1463,8 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
   // As a special case, we support 'a op b @ modifier' by rewriting the
   // expression to include the modifier. This is inefficient, but in general we
   // expect users to use 'a at modifier op b'.
-  if (parseOptionalToken(AsmToken::At)) {
+  if (Ctx.getAsmInfo()->useAtForSpecifier() &&
+      parseOptionalToken(AsmToken::At)) {
     if (Lexer.isNot(AsmToken::Identifier))
       return TokError("unexpected symbol modifier following '@'");
 

diff  --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index c94ddfa087fd3..70550d269002b 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -741,6 +742,13 @@ bool ELFAsmParser::parseDirectiveType(StringRef, SMLoc) {
   // Handle the identifier as the key symbol.
   MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
+  bool AllowAt = getLexer().getAllowAtInIdentifier();
+  if (!AllowAt &&
+      !getContext().getAsmInfo()->getCommentString().starts_with("@"))
+    getLexer().setAllowAtInIdentifier(true);
+  auto _ =
+      make_scope_exit([&]() { getLexer().setAllowAtInIdentifier(AllowAt); });
+
   // NOTE the comma is optional in all cases.  It is only documented as being
   // optional in the first case, however, GAS will silently treat the comma as
   // optional in all cases.  Furthermore, although the documentation states that

diff  --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index f38b73a784632..789f7ec09d759 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -101,7 +101,8 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
   }
 
   // foo(plt) instead of foo at plt
-  UseParensForSymbolVariant = true;
+  UseAtForSpecifier = false;
+  UseParensForSpecifier = true;
 
   initializeVariantKinds(variantKindDescs);
 }
@@ -148,7 +149,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
   SupportsDebugInformation = true;
   ExceptionsType = ExceptionHandling::WinEH;
   WinEHEncodingType = WinEH::EncodingType::Itanium;
-  UseParensForSymbolVariant = true;
+  UseAtForSpecifier = false;
+  UseParensForSpecifier = true;
 
   DwarfRegNumForCFI = false;
 

diff  --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index c1670326143e3..d65eaac3716a1 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2079,19 +2079,23 @@ ParseStatus RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
 
   if (getLexer().getKind() != AsmToken::Identifier)
     return ParseStatus::NoMatch;
+  std::string Identifier(getTok().getIdentifier());
 
-  // Avoid parsing the register in `call rd, foo` as a call symbol.
-  if (getLexer().peekTok().getKind() != AsmToken::EndOfStatement)
+  if (getLexer().peekTok().is(AsmToken::At)) {
+    Lex();
+    Lex();
+    StringRef PLT;
+    if (getParser().parseIdentifier(PLT) || PLT != "plt")
+      return ParseStatus::Failure;
+  } else if (!getLexer().peekTok().is(AsmToken::EndOfStatement)) {
+    // Avoid parsing the register in `call rd, foo` as a call symbol.
     return ParseStatus::NoMatch;
-
-  StringRef Identifier;
-  if (getParser().parseIdentifier(Identifier))
-    return ParseStatus::Failure;
+  } else {
+    Lex();
+  }
 
   SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
-
   RISCVMCExpr::Specifier Kind = RISCVMCExpr::VK_CALL_PLT;
-  (void)Identifier.consume_back("@plt");
 
   MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
   Res = MCSymbolRefExpr::create(Sym, getContext());

diff  --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 7e9b312d3c25e..d1e8ec9d6b54a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -26,6 +26,7 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
   AlignmentIsInBytes = false;
   SupportsDebugInformation = true;
   ExceptionsType = ExceptionHandling::DwarfCFI;
+  UseAtForSpecifier = false;
   Data16bitsDirective = "\t.half\t";
   Data32bitsDirective = "\t.word\t";
 }

diff  --git a/llvm/test/MC/RISCV/pseudo-jump-invalid.s b/llvm/test/MC/RISCV/pseudo-jump-invalid.s
index 834b5a186b007..18640b6617ea3 100644
--- a/llvm/test/MC/RISCV/pseudo-jump-invalid.s
+++ b/llvm/test/MC/RISCV/pseudo-jump-invalid.s
@@ -1,5 +1,6 @@
 # RUN: not llvm-mc -triple riscv32 < %s 2>&1 | FileCheck %s
 
 jump 1234, x31 # CHECK: :[[@LINE]]:6: error: operand must be a valid jump target
-jump foo at plt, x31 # CHECK: :[[@LINE]]:10: error: invalid variant 'plt'
+jump foo at plt, x31 # CHECK: :[[@LINE]]:9: error: unexpected token
 jump %pcrel_lo(1234), x31 # CHECK: :[[@LINE]]:6: error: unknown token in expression
+jump foo at xxx # CHECK: :[[@LINE]]:9: error: unexpected token

diff  --git a/llvm/test/MC/RISCV/rv32i-aliases-invalid.s b/llvm/test/MC/RISCV/rv32i-aliases-invalid.s
index 7f54fe720ea48..63bc1fa09a4a2 100644
--- a/llvm/test/MC/RISCV/rv32i-aliases-invalid.s
+++ b/llvm/test/MC/RISCV/rv32i-aliases-invalid.s
@@ -32,7 +32,7 @@ lla x1, %hi(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a bare sy
 lla x1, %lo(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
 lla x1, %hi(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
 lla x1, %lo(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a bare symbol name or an immediate integer in the range [-2147483648, 4294967295]
-lla a2, foo at plt # CHECK: :[[@LINE]]:17: error: '@plt' operand not valid for instruction
+lla a2, foo at plt # CHECK: :[[@LINE]]:12: error: unexpected token
 
 negw x1, x2   # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}
 sext.w x3, x4 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}}

diff  --git a/llvm/test/MC/RISCV/rv64i-aliases-invalid.s b/llvm/test/MC/RISCV/rv64i-aliases-invalid.s
index 1bd4e78007c83..cc35346cb8801 100644
--- a/llvm/test/MC/RISCV/rv64i-aliases-invalid.s
+++ b/llvm/test/MC/RISCV/rv64i-aliases-invalid.s
@@ -26,7 +26,7 @@ lla x1, %lo(1234) # CHECK: :[[@LINE]]:9: error: operand either must be a constan
 lla x1, %hi(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
 lla x1, %lo(foo) # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
 lla a1, foo+foo # CHECK: :[[@LINE]]:9: error: operand either must be a constant 64-bit integer or a bare symbol name
-lla a2, foo at plt # CHECK: :[[@LINE]]:17: error: '@plt' operand not valid for instruction
+lla a2, foo at plt # CHECK: :[[@LINE]]:12: error: unexpected token
 
 rdinstreth x29 # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV32I Base Instruction Set{{$}}
 rdcycleh x27   # CHECK: :[[@LINE]]:1: error: instruction requires the following: RV32I Base Instruction Set{{$}}


        


More information about the llvm-commits mailing list