[llvm] [RISCV][WIP] Branch to Absolute Address (PR #133555)
Sam Elliott via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 28 19:47:08 PDT 2025
https://github.com/lenary updated https://github.com/llvm/llvm-project/pull/133555
>From 0f9ff50c5a54743256de21c0eed4145a7f886deb Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Fri, 28 Mar 2025 19:22:19 -0700
Subject: [PATCH 1/2] [RISCV][WIP] Branch to Absolute Address
These are some prospective hacks hacks to make `beq r1, r2, <addr>` work
as it does in binutils - that is, `<addr>` is treated as absolute,
rather than relative to the current instruction.
None of this code is particularly nice, but I hope some of it might be a
good place to start discussing whether we want to make this change?
---
.../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 66 ++++++++++++++++---
.../RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 32 ++++++---
llvm/lib/Target/RISCV/RISCVInstrInfo.td | 25 ++++++-
3 files changed, 105 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 31fe02a88e146..ab0305a21d62c 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -196,6 +196,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
ParseStatus parseCSRSystemRegister(OperandVector &Operands);
ParseStatus parseFPImm(OperandVector &Operands);
ParseStatus parseImmediate(OperandVector &Operands);
+ ParseStatus parseBranchOffsetImmediate(OperandVector &Operands);
ParseStatus parseRegister(OperandVector &Operands, bool AllowParens = false);
ParseStatus parseMemOpBaseReg(OperandVector &Operands);
ParseStatus parseZeroOffsetMemOp(OperandVector &Operands);
@@ -556,6 +557,10 @@ struct RISCVOperand final : public MCParsedAsmOperand {
return IsValid && VK == RISCVMCExpr::VK_None;
}
+ bool isBranchOffset() const {
+ return isImm();
+ }
+
// Predicate methods for AsmOperands defined in RISCVInstrInfo.td
bool isBareSymbol() const {
@@ -2009,6 +2014,49 @@ ParseStatus RISCVAsmParser::parseImmediate(OperandVector &Operands) {
return ParseStatus::Success;
}
+ParseStatus RISCVAsmParser::parseBranchOffsetImmediate(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E;
+ const MCExpr *Res;
+
+ switch (getLexer().getKind()) {
+ default:
+ return ParseStatus::NoMatch;
+ case AsmToken::LParen:
+ case AsmToken::Dot:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
+ case AsmToken::Integer:
+ case AsmToken::String:
+ case AsmToken::Identifier: {
+ if (getParser().parseExpression(Res, E))
+ return ParseStatus::Failure;
+
+ // If we're already a symbol-based expression, just return.
+ // This ends up covering expressions like `. + <offset>`
+ if (Res->getKind() != MCExpr::Constant)
+ break;
+
+ // HAX: Create an absolute symbol with value zero, and add the constant.
+ const MCExpr *Zero = MCConstantExpr::create(0, getContext());
+ MCSymbol *AbsSym = getContext().createTempSymbol();
+ // AbsSym->setVariableValue(Zero);
+ getStreamer().emitAssignment(AbsSym, Zero);
+
+ Res = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(AbsSym, getContext()), Res, getContext());
+ break;
+ }
+ case AsmToken::Percent: {
+ return parseOperandWithSpecifier(Operands);
+ }
+ }
+
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return ParseStatus::Success;
+}
+
ParseStatus RISCVAsmParser::parseOperandWithSpecifier(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
@@ -2132,20 +2180,20 @@ ParseStatus RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) {
}
ParseStatus RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
- // Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo`
- // both being acceptable forms. When parsing `jal ra, foo` this function
- // will be called for the `ra` register operand in an attempt to match the
- // single-operand alias. parseJALOffset must fail for this case. It would
- // seem logical to try parse the operand using parseImmediate and return
+ // Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo` both
+ // being acceptable forms. When parsing `jal ra, foo` this function will be
+ // called for the `ra` register operand in an attempt to match the
+ // single-operand alias. parseJALOffset must fail for this case. It would seem
+ // logical to try parse the operand using parseBranchImmediate and return
// NoMatch if the next token is a comma (meaning we must be parsing a jal in
- // the second form rather than the first). We can't do this as there's no
- // way of rewinding the lexer state. Instead, return NoMatch if this operand
- // is an identifier and is followed by a comma.
+ // the second form rather than the first). We can't do this as there's no way
+ // of rewinding the lexer state. Instead, return NoMatch if this operand is an
+ // identifier and is followed by a comma.
if (getLexer().is(AsmToken::Identifier) &&
getLexer().peekTok().is(AsmToken::Comma))
return ParseStatus::NoMatch;
- return parseImmediate(Operands);
+ return parseBranchOffsetImmediate(Operands);
}
bool RISCVAsmParser::parseVTypeToken(const AsmToken &Tok, VTypeState &State,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index cd2322cc5b26d..1dab205a43144 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -102,17 +103,32 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNo);
+
+ if (MO.isExpr()) {
+ // Don't mind me, just need to rifle through some of these expressions to
+ // find out if it is absolute symbol reference to an opaque zero
+ if (const auto *BE = dyn_cast<MCBinaryExpr>(MO.getExpr())) {
+ if (const auto *SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS())) {
+ if (const auto *SymVal = dyn_cast<MCConstantExpr>(SRE->getSymbol().getVariableValue(/*false*/))) {
+ if (BE->getOpcode() == MCBinaryExpr::Add && SymVal->getValue() == 0) {
+ BE->getRHS()->print(O, &MAI);
+ return;
+ }
+ }
+ }
+ }
+
+ MO.getExpr()->print(O, &MAI);
+ }
+
+
if (!MO.isImm())
return printOperand(MI, OpNo, STI, O);
- if (PrintBranchImmAsAddress) {
- uint64_t Target = Address + MO.getImm();
- if (!STI.hasFeature(RISCV::Feature64Bit))
- Target &= 0xffffffff;
- markup(O, Markup::Target) << formatHex(Target);
- } else {
- markup(O, Markup::Target) << formatImm(MO.getImm());
- }
+ uint64_t Target = Address + MO.getImm();
+ if (!STI.hasFeature(RISCV::Feature64Bit))
+ Target &= 0xffffffff;
+ markup(O, Markup::Target) << formatHex(Target);
}
void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index c9386f2307175..615c3e806f12e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -275,6 +275,29 @@ def simm12 : RISCVSImmLeafOp<12> {
def simm12_no6 : ImmLeaf<XLenVT, [{
return isInt<12>(Imm) && !isInt<6>(Imm) && isInt<12>(-Imm);}]>;
+
+def BranchOffsetAsmOperand : AsmOperandClass {
+ let Name = "BranchOffset";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseBranchOffsetImmediate";
+ let DiagnosticType = !strconcat("Invalid", Name);
+ let DiagnosticString = "operand must be an absolute address or an expression";
+}
+
+def branch_offset : Operand<OtherVT> {
+ let ParserMatchClass = BranchOffsetAsmOperand;
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getImmOpValueAsr1";
+ let DecoderMethod = "decodeSImmOperandAndLsl1<13>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isShiftedInt<12, 1>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+ let OperandType = "OPERAND_PCREL";
+}
+
// A 13-bit signed immediate where the least significant bit is zero.
def bare_simm13_lsb0 : Operand<OtherVT> {
let ParserMatchClass = BareSImmNLsb0AsmOperand<13>;
@@ -532,7 +555,7 @@ include "RISCVInstrFormatsV.td"
class BranchCC_rri<bits<3> funct3, string opcodestr>
: RVInstB<funct3, OPC_BRANCH, (outs),
- (ins GPR:$rs1, GPR:$rs2, bare_simm13_lsb0:$imm12),
+ (ins GPR:$rs1, GPR:$rs2, branch_offset:$imm12),
opcodestr, "$rs1, $rs2, $imm12">,
Sched<[WriteJmp, ReadJmp, ReadJmp]> {
let isBranch = 1;
>From 35b11ae2cef143b59efb5ce1b96ca4a02bc97b6b Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Fri, 28 Mar 2025 19:46:56 -0700
Subject: [PATCH 2/2] clang-format
---
llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 10 +++++-----
.../lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 4 ++--
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index ab0305a21d62c..fad067309610e 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -557,9 +557,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
return IsValid && VK == RISCVMCExpr::VK_None;
}
- bool isBranchOffset() const {
- return isImm();
- }
+ bool isBranchOffset() const { return isImm(); }
// Predicate methods for AsmOperands defined in RISCVInstrInfo.td
@@ -2014,7 +2012,8 @@ ParseStatus RISCVAsmParser::parseImmediate(OperandVector &Operands) {
return ParseStatus::Success;
}
-ParseStatus RISCVAsmParser::parseBranchOffsetImmediate(OperandVector &Operands) {
+ParseStatus
+RISCVAsmParser::parseBranchOffsetImmediate(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
@@ -2045,7 +2044,8 @@ ParseStatus RISCVAsmParser::parseBranchOffsetImmediate(OperandVector &Operands)
// AbsSym->setVariableValue(Zero);
getStreamer().emitAssignment(AbsSym, Zero);
- Res = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(AbsSym, getContext()), Res, getContext());
+ Res = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(AbsSym, getContext()),
+ Res, getContext());
break;
}
case AsmToken::Percent: {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 1dab205a43144..d96e649070502 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -109,7 +109,8 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
// find out if it is absolute symbol reference to an opaque zero
if (const auto *BE = dyn_cast<MCBinaryExpr>(MO.getExpr())) {
if (const auto *SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS())) {
- if (const auto *SymVal = dyn_cast<MCConstantExpr>(SRE->getSymbol().getVariableValue(/*false*/))) {
+ if (const auto *SymVal = dyn_cast<MCConstantExpr>(
+ SRE->getSymbol().getVariableValue(/*false*/))) {
if (BE->getOpcode() == MCBinaryExpr::Add && SymVal->getValue() == 0) {
BE->getRHS()->print(O, &MAI);
return;
@@ -121,7 +122,6 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
MO.getExpr()->print(O, &MAI);
}
-
if (!MO.isImm())
return printOperand(MI, OpNo, STI, O);
More information about the llvm-commits
mailing list