[llvm] [RISCV][WIP] Branch to Absolute Address (PR #133555)

Sam Elliott via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 19:47:08 PDT 2025


https://github.com/lenary updated https://github.com/llvm/llvm-project/pull/133555

>From 0f9ff50c5a54743256de21c0eed4145a7f886deb Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Fri, 28 Mar 2025 19:22:19 -0700
Subject: [PATCH 1/2] [RISCV][WIP] Branch to Absolute Address

These are some prospective hacks hacks to make `beq r1, r2, <addr>` work
as it does in binutils - that is, `<addr>` is treated as absolute,
rather than relative to the current instruction.

None of this code is particularly nice, but I hope some of it might be a
good place to start discussing whether we want to make this change?
---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 66 ++++++++++++++++---
 .../RISCV/MCTargetDesc/RISCVInstPrinter.cpp   | 32 ++++++---
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       | 25 ++++++-
 3 files changed, 105 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 31fe02a88e146..ab0305a21d62c 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -196,6 +196,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
   ParseStatus parseCSRSystemRegister(OperandVector &Operands);
   ParseStatus parseFPImm(OperandVector &Operands);
   ParseStatus parseImmediate(OperandVector &Operands);
+  ParseStatus parseBranchOffsetImmediate(OperandVector &Operands);
   ParseStatus parseRegister(OperandVector &Operands, bool AllowParens = false);
   ParseStatus parseMemOpBaseReg(OperandVector &Operands);
   ParseStatus parseZeroOffsetMemOp(OperandVector &Operands);
@@ -556,6 +557,10 @@ struct RISCVOperand final : public MCParsedAsmOperand {
     return IsValid && VK == RISCVMCExpr::VK_None;
   }
 
+  bool isBranchOffset() const {
+    return isImm();
+  }
+
   // Predicate methods for AsmOperands defined in RISCVInstrInfo.td
 
   bool isBareSymbol() const {
@@ -2009,6 +2014,49 @@ ParseStatus RISCVAsmParser::parseImmediate(OperandVector &Operands) {
   return ParseStatus::Success;
 }
 
+ParseStatus RISCVAsmParser::parseBranchOffsetImmediate(OperandVector &Operands) {
+  SMLoc S = getLoc();
+  SMLoc E;
+  const MCExpr *Res;
+
+  switch (getLexer().getKind()) {
+  default:
+    return ParseStatus::NoMatch;
+  case AsmToken::LParen:
+  case AsmToken::Dot:
+  case AsmToken::Minus:
+  case AsmToken::Plus:
+  case AsmToken::Exclaim:
+  case AsmToken::Tilde:
+  case AsmToken::Integer:
+  case AsmToken::String:
+  case AsmToken::Identifier: {
+    if (getParser().parseExpression(Res, E))
+      return ParseStatus::Failure;
+
+    // If we're already a symbol-based expression, just return.
+    // This ends up covering expressions like `. + <offset>`
+    if (Res->getKind() != MCExpr::Constant)
+      break;
+
+    // HAX: Create an absolute symbol with value zero, and add the constant.
+    const MCExpr *Zero = MCConstantExpr::create(0, getContext());
+    MCSymbol *AbsSym = getContext().createTempSymbol();
+    // AbsSym->setVariableValue(Zero);
+    getStreamer().emitAssignment(AbsSym, Zero);
+
+    Res = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(AbsSym, getContext()), Res, getContext());
+    break;
+  }
+  case AsmToken::Percent: {
+    return parseOperandWithSpecifier(Operands);
+  }
+  }
+
+  Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+  return ParseStatus::Success;
+}
+
 ParseStatus RISCVAsmParser::parseOperandWithSpecifier(OperandVector &Operands) {
   SMLoc S = getLoc();
   SMLoc E;
@@ -2132,20 +2180,20 @@ ParseStatus RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) {
 }
 
 ParseStatus RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
-  // Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo`
-  // both being acceptable forms. When parsing `jal ra, foo` this function
-  // will be called for the `ra` register operand in an attempt to match the
-  // single-operand alias. parseJALOffset must fail for this case. It would
-  // seem logical to try parse the operand using parseImmediate and return
+  // Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo` both
+  // being acceptable forms. When parsing `jal ra, foo` this function will be
+  // called for the `ra` register operand in an attempt to match the
+  // single-operand alias. parseJALOffset must fail for this case. It would seem
+  // logical to try parse the operand using parseBranchImmediate and return
   // NoMatch if the next token is a comma (meaning we must be parsing a jal in
-  // the second form rather than the first). We can't do this as there's no
-  // way of rewinding the lexer state. Instead, return NoMatch if this operand
-  // is an identifier and is followed by a comma.
+  // the second form rather than the first). We can't do this as there's no way
+  // of rewinding the lexer state. Instead, return NoMatch if this operand is an
+  // identifier and is followed by a comma.
   if (getLexer().is(AsmToken::Identifier) &&
       getLexer().peekTok().is(AsmToken::Comma))
     return ParseStatus::NoMatch;
 
-  return parseImmediate(Operands);
+  return parseBranchOffsetImmediate(Operands);
 }
 
 bool RISCVAsmParser::parseVTypeToken(const AsmToken &Tok, VTypeState &State,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index cd2322cc5b26d..1dab205a43144 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -102,17 +103,32 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
                                           const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNo);
+
+  if (MO.isExpr()) {
+    // Don't mind me, just need to rifle through some of these expressions to
+    // find out if it is absolute symbol reference to an opaque zero
+    if (const auto *BE = dyn_cast<MCBinaryExpr>(MO.getExpr())) {
+      if (const auto *SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS())) {
+        if (const auto *SymVal = dyn_cast<MCConstantExpr>(SRE->getSymbol().getVariableValue(/*false*/))) {
+          if (BE->getOpcode() == MCBinaryExpr::Add && SymVal->getValue() == 0) {
+            BE->getRHS()->print(O, &MAI);
+            return;
+          }
+        }
+      }
+    }
+
+    MO.getExpr()->print(O, &MAI);
+  }
+
+
   if (!MO.isImm())
     return printOperand(MI, OpNo, STI, O);
 
-  if (PrintBranchImmAsAddress) {
-    uint64_t Target = Address + MO.getImm();
-    if (!STI.hasFeature(RISCV::Feature64Bit))
-      Target &= 0xffffffff;
-    markup(O, Markup::Target) << formatHex(Target);
-  } else {
-    markup(O, Markup::Target) << formatImm(MO.getImm());
-  }
+  uint64_t Target = Address + MO.getImm();
+  if (!STI.hasFeature(RISCV::Feature64Bit))
+    Target &= 0xffffffff;
+  markup(O, Markup::Target) << formatHex(Target);
 }
 
 void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index c9386f2307175..615c3e806f12e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -275,6 +275,29 @@ def simm12 : RISCVSImmLeafOp<12> {
 def simm12_no6 : ImmLeaf<XLenVT, [{
   return isInt<12>(Imm) && !isInt<6>(Imm) && isInt<12>(-Imm);}]>;
 
+
+def BranchOffsetAsmOperand : AsmOperandClass {
+  let Name = "BranchOffset";
+  let RenderMethod = "addImmOperands";
+  let ParserMethod = "parseBranchOffsetImmediate";
+  let DiagnosticType = !strconcat("Invalid", Name);
+  let DiagnosticString = "operand must be an absolute address or an expression";
+}
+
+def branch_offset : Operand<OtherVT> {
+  let ParserMatchClass = BranchOffsetAsmOperand;
+  let PrintMethod = "printBranchOperand";
+  let EncoderMethod = "getImmOpValueAsr1";
+  let DecoderMethod = "decodeSImmOperandAndLsl1<13>";
+  let MCOperandPredicate = [{
+    int64_t Imm;
+    if (MCOp.evaluateAsConstantImm(Imm))
+      return isShiftedInt<12, 1>(Imm);
+    return MCOp.isBareSymbolRef();
+  }];
+  let OperandType = "OPERAND_PCREL";
+}
+
 // A 13-bit signed immediate where the least significant bit is zero.
 def bare_simm13_lsb0 : Operand<OtherVT> {
   let ParserMatchClass = BareSImmNLsb0AsmOperand<13>;
@@ -532,7 +555,7 @@ include "RISCVInstrFormatsV.td"
 
 class BranchCC_rri<bits<3> funct3, string opcodestr>
     : RVInstB<funct3, OPC_BRANCH, (outs),
-              (ins GPR:$rs1, GPR:$rs2, bare_simm13_lsb0:$imm12),
+              (ins GPR:$rs1, GPR:$rs2, branch_offset:$imm12),
               opcodestr, "$rs1, $rs2, $imm12">,
       Sched<[WriteJmp, ReadJmp, ReadJmp]> {
   let isBranch = 1;

>From 35b11ae2cef143b59efb5ce1b96ca4a02bc97b6b Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Fri, 28 Mar 2025 19:46:56 -0700
Subject: [PATCH 2/2] clang-format

---
 llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp     | 10 +++++-----
 .../lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index ab0305a21d62c..fad067309610e 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -557,9 +557,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
     return IsValid && VK == RISCVMCExpr::VK_None;
   }
 
-  bool isBranchOffset() const {
-    return isImm();
-  }
+  bool isBranchOffset() const { return isImm(); }
 
   // Predicate methods for AsmOperands defined in RISCVInstrInfo.td
 
@@ -2014,7 +2012,8 @@ ParseStatus RISCVAsmParser::parseImmediate(OperandVector &Operands) {
   return ParseStatus::Success;
 }
 
-ParseStatus RISCVAsmParser::parseBranchOffsetImmediate(OperandVector &Operands) {
+ParseStatus
+RISCVAsmParser::parseBranchOffsetImmediate(OperandVector &Operands) {
   SMLoc S = getLoc();
   SMLoc E;
   const MCExpr *Res;
@@ -2045,7 +2044,8 @@ ParseStatus RISCVAsmParser::parseBranchOffsetImmediate(OperandVector &Operands)
     // AbsSym->setVariableValue(Zero);
     getStreamer().emitAssignment(AbsSym, Zero);
 
-    Res = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(AbsSym, getContext()), Res, getContext());
+    Res = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(AbsSym, getContext()),
+                                  Res, getContext());
     break;
   }
   case AsmToken::Percent: {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 1dab205a43144..d96e649070502 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -109,7 +109,8 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
     // find out if it is absolute symbol reference to an opaque zero
     if (const auto *BE = dyn_cast<MCBinaryExpr>(MO.getExpr())) {
       if (const auto *SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS())) {
-        if (const auto *SymVal = dyn_cast<MCConstantExpr>(SRE->getSymbol().getVariableValue(/*false*/))) {
+        if (const auto *SymVal = dyn_cast<MCConstantExpr>(
+                SRE->getSymbol().getVariableValue(/*false*/))) {
           if (BE->getOpcode() == MCBinaryExpr::Add && SymVal->getValue() == 0) {
             BE->getRHS()->print(O, &MAI);
             return;
@@ -121,7 +122,6 @@ void RISCVInstPrinter::printBranchOperand(const MCInst *MI, uint64_t Address,
     MO.getExpr()->print(O, &MAI);
   }
 
-
   if (!MO.isImm())
     return printOperand(MI, OpNo, STI, O);
 



More information about the llvm-commits mailing list