[llvm] [RISCV][MC] Implement evaluateBranch for auipc+jalr pairs (PR #65480)

Job Noorman via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 19 23:34:58 PDT 2023


https://github.com/mtvec updated https://github.com/llvm/llvm-project/pull/65480

>From f8b8f8a1f84c9a873d0832d03fe3a977bc8bfca8 Mon Sep 17 00:00:00 2001
From: Job Noorman <jnoorman at igalia.com>
Date: Wed, 6 Sep 2023 11:29:28 +0200
Subject: [PATCH] [RISCV][MC] Implement evaluateBranch for auipc+jalr pairs

This patch implements `MCInstrAnalysis` state in order to be able
analyze auipc+jalr pairs inside `evaluateBranch`.

This is implemented as follows:
- State: array of currently known GPR values;
- Whenever an auipc is detected in `updateState`, update the state value
  of RD with the immediate;
- Whenever a jalr is detected in `evaluateBranch`, check if the state
  holds a value for RS1 and use that to compute its target.

Note that this is similar to how binutils implements it and the output
of llvm-objdump should now mostly match the one of GNU objdump.

This patch also updates the relevant llvm-objdump patches and adds a new
one testing the output for interleaved auipc+jalr pairs.
---
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp  | 79 +++++++++++++++++++
 .../tools/llvm-objdump/ELF/RISCV/branches.s   |  4 +-
 .../ELF/RISCV/multi-instr-target.s            | 45 +++++++++++
 llvm/tools/llvm-objdump/llvm-objdump.cpp      |  4 +
 4 files changed, 130 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/tools/llvm-objdump/ELF/RISCV/multi-instr-target.s

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 75af5c2de09469b..79e56a7a6d03d77 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -31,6 +31,7 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <bitset>
 
 #define GET_INSTRINFO_MC_DESC
 #define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -114,10 +115,79 @@ static MCTargetStreamer *createRISCVNullTargetStreamer(MCStreamer &S) {
 namespace {
 
 class RISCVMCInstrAnalysis : public MCInstrAnalysis {
+  int64_t GPRState[31] = {};
+  std::bitset<31> GPRValidMask;
+
+  static bool isGPR(unsigned Reg) {
+    return Reg >= RISCV::X0 && Reg <= RISCV::X31;
+  }
+
+  static unsigned getRegIndex(unsigned Reg) {
+    assert(isGPR(Reg) && Reg != RISCV::X0 && "Invalid GPR reg");
+    return Reg - RISCV::X1;
+  }
+
+  void setGPRState(unsigned Reg, std::optional<int64_t> Value) {
+    if (Reg == RISCV::X0)
+      return;
+
+    auto Index = getRegIndex(Reg);
+
+    if (Value) {
+      GPRState[Index] = *Value;
+      GPRValidMask.set(Index);
+    } else {
+      GPRValidMask.reset(Index);
+    }
+  }
+
+  std::optional<int64_t> getGPRState(unsigned Reg) const {
+    if (Reg == RISCV::X0)
+      return 0;
+
+    auto Index = getRegIndex(Reg);
+
+    if (GPRValidMask.test(Index))
+      return GPRState[Index];
+    return std::nullopt;
+  }
+
 public:
   explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info)
       : MCInstrAnalysis(Info) {}
 
+  void resetState() override { GPRValidMask.reset(); }
+
+  void updateState(const MCInst &Inst, uint64_t Addr) override {
+    // Terminators mark the end of a basic block which means the sequentially
+    // next instruction will be the first of another basic block and the current
+    // state will typically not be valid anymore. For calls, we assume all
+    // registers may be clobbered by the callee (TODO: should we take the
+    // calling convention into account?).
+    if (isTerminator(Inst) || isCall(Inst)) {
+      resetState();
+      return;
+    }
+
+    switch (Inst.getOpcode()) {
+    default: {
+      // Clear the state of all defined registers for instructions that we don't
+      // explicitly support.
+      auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+      for (unsigned I = 0; I < NumDefs; ++I) {
+        auto DefReg = Inst.getOperand(I).getReg();
+        if (isGPR(DefReg))
+          setGPRState(DefReg, std::nullopt);
+      }
+      break;
+    }
+    case RISCV::AUIPC:
+      setGPRState(Inst.getOperand(0).getReg(),
+                  Addr + (Inst.getOperand(1).getImm() << 12));
+      break;
+    }
+  }
+
   bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
                       uint64_t &Target) const override {
     if (isConditionalBranch(Inst)) {
@@ -140,6 +210,15 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
       return true;
     }
 
+    if (Inst.getOpcode() == RISCV::JALR) {
+      if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+        Target = *TargetRegState + Inst.getOperand(2).getImm();
+        return true;
+      }
+
+      return false;
+    }
+
     return false;
   }
 
diff --git a/llvm/test/tools/llvm-objdump/ELF/RISCV/branches.s b/llvm/test/tools/llvm-objdump/ELF/RISCV/branches.s
index 5fec4e6e25a39a3..ebd86a702b70e7c 100644
--- a/llvm/test/tools/llvm-objdump/ELF/RISCV/branches.s
+++ b/llvm/test/tools/llvm-objdump/ELF/RISCV/branches.s
@@ -57,11 +57,11 @@ c.jal bar
 c.j bar
 
 # CHECK: auipc ra, 0
-# CHECK: jalr	ra, 16(ra){{$}}
+# CHECK: jalr	ra, 16(ra) <foo+0x58>
 call .Llocal
 
 # CHECK: auipc ra, 0
-# CHECK: jalr	ra, 16(ra){{$}}
+# CHECK: jalr	ra, 16(ra) <bar>
 call bar
 
 .Llocal:
diff --git a/llvm/test/tools/llvm-objdump/ELF/RISCV/multi-instr-target.s b/llvm/test/tools/llvm-objdump/ELF/RISCV/multi-instr-target.s
new file mode 100644
index 000000000000000..91b643e961fc6df
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/RISCV/multi-instr-target.s
@@ -0,0 +1,45 @@
+# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+c < %s | \
+# RUN:     llvm-objdump -d -M no-aliases --no-show-raw-insn - | \
+# RUN:     FileCheck %s
+
+## Test multiple interleaved auipc/jalr pairs
+# CHECK: auipc t0, 0
+1: auipc t0, %pcrel_hi(bar)
+# CHECK: auipc t1, 0
+2: auipc t1, %pcrel_hi(bar)
+# CHECK: jalr ra, {{[0-9]+}}(t0) <bar>
+jalr %pcrel_lo(1b)(t0)
+## Target should not be printed because the call above clobbers register state
+# CHECK: jalr ra, {{[0-9]+}}(t1){{$}}
+jalr %pcrel_lo(2b)(t1)
+
+## Test that auipc+jalr with a write to the target register in between does not
+## print the target
+# CHECK: auipc t0, 0
+1: auipc t0, %pcrel_hi(bar)
+# CHECK: c.li t0, 0
+li t0, 0
+# CHECK: jalr ra, {{[0-9]+}}(t0){{$}}
+jalr %pcrel_lo(1b)(t0)
+
+## Test that auipc+jalr with a write to an unrelated register in between does
+## print the target
+# CHECK: auipc t0, 0
+1: auipc t0, %pcrel_hi(bar)
+# CHECK: c.li t1, 0
+li t1, 0
+# CHECK: jalr ra, {{[0-9]+}}(t0) <bar>
+jalr %pcrel_lo(1b)(t0)
+
+## Test that auipc+jalr with a terminator in between does not print the target
+# CHECK: auipc t0, 0
+1: auipc t0, %pcrel_hi(bar)
+# CHECK: c.j {{.*}} <bar>
+j bar
+# CHECK: jalr ra, {{[0-9]+}}(t0){{$}}
+jalr %pcrel_lo(1b)(t0)
+
+# CHECK-LABEL: <bar>:
+bar:
+# CHECK: c.nop
+nop
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index f02bd6a9b531a80..a112c50bf7f2715 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1322,6 +1322,8 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
           !(STI->getTargetTriple().isPPC() && Target == Index))
         Labels[Target] = ("L" + Twine(LabelCount++)).str();
       MIA->updateState(Inst, Index);
+    } else if (!Disassembled && MIA) {
+      MIA->resetState();
     }
     Index += Size;
   }
@@ -2194,6 +2196,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
             }
 
             DT->InstrAnalysis->updateState(Inst, SectionAddr + Index);
+          } else if (!Disassembled && DT->InstrAnalysis) {
+            DT->InstrAnalysis->resetState();
           }
         }
 



More information about the llvm-commits mailing list