[llvm] [feature][riscv] handle target address calculation in llvm-objdump disassembly for riscv (PR #109914)

Arjun Patel via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 21 09:19:29 PST 2024


https://github.com/arjunUpatel updated https://github.com/llvm/llvm-project/pull/109914

>From a46dd9231d3a175f4f4a6e28cc6146cc5f4ec6eb Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Wed, 25 Sep 2024 00:59:45 -0400
Subject: [PATCH 01/12] added evaluateInstruction method. needs tests

---
 llvm/include/llvm/MC/MCInstrAnalysis.h        |  4 ++
 llvm/lib/MC/MCInstrAnalysis.cpp               |  7 ++
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp  | 69 +++++++++++++++----
 llvm/tools/llvm-objdump/llvm-objdump.cpp      |  2 +-
 4 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index b571791c518da8..7a0085268727a2 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -181,6 +181,10 @@ class MCInstrAnalysis {
   evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
                  uint64_t &Target) const;
 
+  virtual bool
+  evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+                 uint64_t &Target) const;
+
   /// Given an instruction tries to get the address of a memory operand. Returns
   /// the address on success.
   virtual std::optional<uint64_t>
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index cea905d092e0b3..de1319698f966c 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -10,6 +10,7 @@
 
 #include "llvm/ADT/APInt.h"
 #include <cstdint>
+#include "MCInstrAnalysis.h"
 
 namespace llvm {
 class MCSubtargetInfo;
@@ -30,6 +31,12 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
   return false;
 }
 
+bool llvm::MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
+                                                uint64_t Addr, uint64_t Size,
+                                                uint64_t &Target) const {
+  return false;
+}
+
 std::optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
     const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
     uint64_t Size) const {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index a0dc9d93c84b8d..73d975bf3d6a39 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -178,21 +178,24 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
     }
 
     switch (Inst.getOpcode()) {
-    default: {
-      // Clear the state of all defined registers for instructions that we don't
-      // explicitly support.
-      auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
-      for (unsigned I = 0; I < NumDefs; ++I) {
-        auto DefReg = Inst.getOperand(I).getReg();
-        if (isGPR(DefReg))
-          setGPRState(DefReg, std::nullopt);
+      default: {
+        // Clear the state of all defined registers for instructions that we don't
+        // explicitly support.
+        auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+        for (unsigned I = 0; I < NumDefs; ++I) {
+          auto DefReg = Inst.getOperand(I).getReg();
+          if (isGPR(DefReg))
+            setGPRState(DefReg, std::nullopt);
+        }
+        break;
+      }
+      case RISCV::AUIPC:
+      case RISCV::LUI:
+      {
+        setGPRState(Inst.getOperand(0).getReg(), 
+                    Inst.getOperand(1).getImm() << 12);
+        break;
       }
-      break;
-    }
-    case RISCV::AUIPC:
-      setGPRState(Inst.getOperand(0).getReg(),
-                  Addr + (Inst.getOperand(1).getImm() << 12));
-      break;
     }
   }
 
@@ -230,6 +233,44 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
     return false;
   }
 
+  bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
+                      uint64_t &Target) const override {
+    switch(Inst.getOpcode()) {
+      default:
+        return false;
+      case RISCV::ADDI:
+      case RISCV::ADDIW: {
+        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+          Target  = *TargetRegState + Inst.getOperand(2).getImm();
+          return true;
+        }
+        break;
+      }
+      case RISCV::LB:
+      case RISCV::LH:
+      case RISCV::LW:
+      case RISCV::LBU:
+      case RISCV::LHU:
+      case RISCV::LWU:
+      case RISCV::LD:
+      case RISCV::FLW:
+      case RISCV::FLD:
+      case RISCV::SB:
+      case RISCV::SH:
+      case RISCV::SW:
+      case RISCV::FSW:
+      case RISCV::SD:
+      case RISCV::FSD: {
+        int64_t Offset = Inst.getOperand(2).getImm();
+        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg()))
+          Target = *TargetRegState + Offset;
+        else
+          Target = Offset;
+        return true;
+      }
+    }
+  }
+
   bool isTerminator(const MCInst &Inst) const override {
     if (MCInstrAnalysis::isTerminator(Inst))
       return true;
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index b69d14b4e7609a..915394fc3b3724 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1513,7 +1513,7 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
     if (MIA) {
       if (Disassembled) {
         uint64_t Target;
-        bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+        bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target) || MIA->evaluateInstruction(Inst, Index, Size, Target);
         if (TargetKnown && (Target >= Start && Target < End) &&
             !Labels.count(Target)) {
           // On PowerPC and AIX, a function call is encoded as a branch to 0.

>From 30dd584dc7ec6e4c162aecd3e0ddcb906b9d6374 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Wed, 25 Sep 2024 03:26:15 -0400
Subject: [PATCH 02/12] dealing with git

---
 llvm/lib/MC/MCInstrAnalysis.cpp                          | 3 +--
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 9 ++++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index de1319698f966c..49acfbbfcc6598 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -10,7 +10,6 @@
 
 #include "llvm/ADT/APInt.h"
 #include <cstdint>
-#include "MCInstrAnalysis.h"
 
 namespace llvm {
 class MCSubtargetInfo;
@@ -31,7 +30,7 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
   return false;
 }
 
-bool llvm::MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
+bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
                                                 uint64_t Addr, uint64_t Size,
                                                 uint64_t &Target) const {
   return false;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 73d975bf3d6a39..e431fa1014d5e0 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -189,9 +189,12 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
         }
         break;
       }
-      case RISCV::AUIPC:
-      case RISCV::LUI:
-      {
+      case RISCV::AUIPC: {
+        setGPRState(Inst.getOperand(0).getReg(), 
+                    Addr + (Inst.getOperand(1).getImm() << 12));
+        break;
+      }
+      case RISCV::LUI: {
         setGPRState(Inst.getOperand(0).getReg(), 
                     Inst.getOperand(1).getImm() << 12);
         break;

>From fce36f154b413dbfa2c4a8da7d6b9ee7211180f8 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Sun, 13 Oct 2024 22:24:56 -0400
Subject: [PATCH 03/12] sign extend relevant immediates

---
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp  | 51 +++++++++++++------
 llvm/tools/llvm-objdump/llvm-objdump.cpp      | 22 +++++---
 2 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index e431fa1014d5e0..6325b5bd594dea 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -31,7 +31,9 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include <bitset>
+#include <cstdint>
 
 #define GET_INSTRINFO_MC_DESC
 #define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -178,6 +180,24 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
     }
 
     switch (Inst.getOpcode()) {
+      case RISCV::LUI: {
+        setGPRState(Inst.getOperand(0).getReg(), 
+                    SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+        break;
+      }
+      case RISCV::C_LUI: {
+        MCRegister Reg = Inst.getOperand(0).getReg();
+        if (Reg == RISCV::X2)
+          break;
+        setGPRState(Reg, SignExtend64<17>(Inst.getOperand(1).getImm() << 12));
+        break;
+
+      }
+      case RISCV::AUIPC: {
+        setGPRState(Inst.getOperand(0).getReg(), 
+                    Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+        break;
+      }
       default: {
         // Clear the state of all defined registers for instructions that we don't
         // explicitly support.
@@ -189,16 +209,6 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
         }
         break;
       }
-      case RISCV::AUIPC: {
-        setGPRState(Inst.getOperand(0).getReg(), 
-                    Addr + (Inst.getOperand(1).getImm() << 12));
-        break;
-      }
-      case RISCV::LUI: {
-        setGPRState(Inst.getOperand(0).getReg(), 
-                    Inst.getOperand(1).getImm() << 12);
-        break;
-      }
     }
   }
 
@@ -244,34 +254,43 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
       case RISCV::ADDI:
       case RISCV::ADDIW: {
         if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
-          Target  = *TargetRegState + Inst.getOperand(2).getImm();
+          // TODO: Figure out ways to find the actual value of XLEN during analysis
+          int XLEN = 32;
+          uint64_t mask = ~(0) >> XLEN;
+          Target  = *TargetRegState + SignExtend64<12>(Inst.getOperand(2).getImm());
+          Target &= mask;
+          Target = SignExtend64<32>(Target);
           return true;
         }
         break;
       }
       case RISCV::LB:
       case RISCV::LH:
+      case RISCV::LD:
       case RISCV::LW:
       case RISCV::LBU:
       case RISCV::LHU:
       case RISCV::LWU:
-      case RISCV::LD:
-      case RISCV::FLW:
-      case RISCV::FLD:
       case RISCV::SB:
       case RISCV::SH:
       case RISCV::SW:
-      case RISCV::FSW:
       case RISCV::SD:
+      case RISCV::FLH:
+      case RISCV::FLW:
+      case RISCV::FLD:
+      case RISCV::FSH:
+      case RISCV::FSW:
       case RISCV::FSD: {
-        int64_t Offset = Inst.getOperand(2).getImm();
+        int64_t Offset = SignExtend64<12>(Inst.getOperand(2).getImm());
         if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg()))
           Target = *TargetRegState + Offset;
         else
           Target = Offset;
         return true;
       }
+      // TODO: Add cases for compressed load and store instructions
     }
+    return false;
   }
 
   bool isTerminator(const MCInst &Inst) const override {
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 915394fc3b3724..f2199f8e5b486a 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1513,9 +1513,8 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
     if (MIA) {
       if (Disassembled) {
         uint64_t Target;
-        bool TargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target) || MIA->evaluateInstruction(Inst, Index, Size, Target);
-        if (TargetKnown && (Target >= Start && Target < End) &&
-            !Labels.count(Target)) {
+        bool BranchTargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
+        if (BranchTargetKnown && (Target >= Start && Target < End)) {
           // On PowerPC and AIX, a function call is encoded as a branch to 0.
           // On other PowerPC platforms (ELF), a function call is encoded as
           // a branch to self. Do not add a label for these cases.
@@ -2322,8 +2321,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
           if (Disassembled && DT->InstrAnalysis) {
             llvm::raw_ostream *TargetOS = &FOS;
             uint64_t Target;
-            bool PrintTarget = DT->InstrAnalysis->evaluateBranch(
-                Inst, SectionAddr + Index, Size, Target);
+            bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target) || 
+              DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target);
 
             if (!PrintTarget) {
               if (std::optional<uint64_t> MaybeTarget =
@@ -2397,7 +2396,7 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
                   break;
               }
 
-              // Branch targets are printed just after the instructions.
+              // Branch and instruction targets are printed just after the instructions.
               // Print the labels corresponding to the target if there's any.
               bool BBAddrMapLabelAvailable = BBAddrMapLabels.count(Target);
               bool LabelAvailable = AllLabels.count(Target);
@@ -2478,7 +2477,16 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
                           << ">";
               } else if (LabelAvailable) {
                 *TargetOS << " <" << AllLabels[Target] << ">";
-              }
+              } 
+              // else {
+                // this case is needed because the first load in the test assembly
+                // did not have any symbols in the section nor was it caught by any of
+                // else if cases. this warrented a case where the address is printed realtive
+                // to the target section. Since no symbol was found, there is no need to handle
+                // relocations
+                // *TargetOS << " <" << 
+
+              // }
               // By convention, each record in the comment stream should be
               // terminated.
               if (TargetOS == &CommentStream)

>From 4c8d76967645eb85c98af16bcd9bd60e1d6daf48 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Tue, 15 Oct 2024 14:14:07 -0400
Subject: [PATCH 04/12] fix indentation

---
 llvm/include/llvm/MC/MCInstrAnalysis.h | 2 +-
 llvm/lib/MC/MCInstrAnalysis.cpp        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 7a0085268727a2..71d55d4399cd65 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -183,7 +183,7 @@ class MCInstrAnalysis {
 
   virtual bool
   evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
-                 uint64_t &Target) const;
+                      uint64_t &Target) const;
 
   /// Given an instruction tries to get the address of a memory operand. Returns
   /// the address on success.
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index 49acfbbfcc6598..da7127092b4082 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -31,8 +31,8 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
 }
 
 bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
-                                                uint64_t Addr, uint64_t Size,
-                                                uint64_t &Target) const {
+                                          uint64_t Addr, uint64_t Size,
+                                          uint64_t &Target) const {
   return false;
 }
 

>From 81e6acdc94ae3f846a11bb440ca7bbadf860585c Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Tue, 15 Oct 2024 14:15:47 -0400
Subject: [PATCH 05/12] fix ADDI + sign extention bugs

---
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp  | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 6325b5bd594dea..69d6c6cc8e3fcd 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -189,7 +189,7 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
         MCRegister Reg = Inst.getOperand(0).getReg();
         if (Reg == RISCV::X2)
           break;
-        setGPRState(Reg, SignExtend64<17>(Inst.getOperand(1).getImm() << 12));
+        setGPRState(Reg, SignExtend64<18>(Inst.getOperand(1).getImm() << 12));
         break;
 
       }
@@ -247,18 +247,26 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
   }
 
   bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
-                      uint64_t &Target) const override {
+                           uint64_t &Target) const override {
     switch(Inst.getOpcode()) {
       default:
         return false;
-      case RISCV::ADDI:
-      case RISCV::ADDIW: {
+      case RISCV::ADDI: {
         if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
           // TODO: Figure out ways to find the actual value of XLEN during analysis
           int XLEN = 32;
-          uint64_t mask = ~(0) >> XLEN;
+          uint64_t Mask = ~((uint64_t)0) >> (64 - XLEN);
+          Target = *TargetRegState + SignExtend64<12>(Inst.getOperand(2).getImm());
+          Target &= Mask;
+          return true;
+        }
+        break;
+      }
+      case RISCV::ADDIW: {
+        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+          uint64_t Mask = ~((uint64_t)0) >> 32;
           Target  = *TargetRegState + SignExtend64<12>(Inst.getOperand(2).getImm());
-          Target &= mask;
+          Target &= Mask;
           Target = SignExtend64<32>(Target);
           return true;
         }

>From c31baef522144f8211fd2d3288e8e11a027390de Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Tue, 15 Oct 2024 14:51:21 -0400
Subject: [PATCH 06/12] prevent symbol reoslution in empty sections

---
 llvm/tools/llvm-objdump/llvm-objdump.cpp | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index f2199f8e5b486a..eeb9f376c143fa 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -2366,6 +2366,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
                   if (It->first != TargetSecAddr)
                     break;
                   TargetSectionSymbols.push_back(&AllSymbols[It->second]);
+                  if (AllSymbols[It->second].empty())
+                    TargetSecAddr = 0;
                 }
               } else {
                 TargetSectionSymbols.push_back(&Symbols);
@@ -2478,15 +2480,6 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
               } else if (LabelAvailable) {
                 *TargetOS << " <" << AllLabels[Target] << ">";
               } 
-              // else {
-                // this case is needed because the first load in the test assembly
-                // did not have any symbols in the section nor was it caught by any of
-                // else if cases. this warrented a case where the address is printed realtive
-                // to the target section. Since no symbol was found, there is no need to handle
-                // relocations
-                // *TargetOS << " <" << 
-
-              // }
               // By convention, each record in the comment stream should be
               // terminated.
               if (TargetOS == &CommentStream)

>From de457f9c047aade5b0bc175479fb1799394ccead Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Sun, 20 Oct 2024 14:35:00 -0400
Subject: [PATCH 07/12] added support for compressed instructions

---
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp  | 38 ++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 69d6c6cc8e3fcd..a4e1d68beb5349 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -272,6 +272,29 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
         }
         break;
       }
+      case RISCV::C_ADDI: {
+        int64_t Offset = Inst.getOperand(2).getImm();
+        if (Offset == 0)
+          break;
+        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+          Target = *TargetRegState + SignExtend64<6>(Offset);
+          return true;
+        }
+        break;
+      }
+      case RISCV::C_ADDIW: {
+        int64_t Offset = Inst.getOperand(2).getImm(); 
+        if (Offset == 0)
+          break;
+        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+          uint64_t Mask = ~((uint64_t)0) >> 32;
+          Target &= Mask;
+          Target = *TargetRegState + SignExtend64<6>(Offset);
+          Target = SignExtend64<32>(Target);
+          return true;
+        }
+        break;
+      }
       case RISCV::LB:
       case RISCV::LH:
       case RISCV::LD:
@@ -296,7 +319,20 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
           Target = Offset;
         return true;
       }
-      // TODO: Add cases for compressed load and store instructions
+      case RISCV::C_LD:
+      case RISCV::C_SD:
+      case RISCV::C_FLD:
+      case RISCV::C_FSD:
+      case RISCV::C_SW:
+      case RISCV::C_LW:
+      case RISCV::C_FSW:
+      case RISCV::C_FLW: {
+        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+          Target = *TargetRegState + Inst.getOperand(2).getImm();
+          return true;
+        }
+        break;
+      }
     }
     return false;
   }

>From ecfcf565cad6747a72759e715878185f3d616225 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Sun, 20 Oct 2024 17:26:16 -0400
Subject: [PATCH 08/12] call evaluateInstruction only when target=RISCV

---
 llvm/tools/llvm-objdump/llvm-objdump.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index fab4651521e676..3f332f7db17e90 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -2322,9 +2322,9 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
           if (Disassembled && DT->InstrAnalysis) {
             llvm::raw_ostream *TargetOS = &FOS;
             uint64_t Target;
-            bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target) || 
-              DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target);
-
+            bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target);
+            if (DT->SubtargetInfo->getTargetTriple().isRISCV())
+              PrintTarget = DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target);
             if (!PrintTarget) {
               if (std::optional<uint64_t> MaybeTarget =
                       DT->InstrAnalysis->evaluateMemoryOperandAddress(

>From 33e100edfa55376b39eceb73d0ff04a0990282c8 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Mon, 21 Oct 2024 16:03:41 -0400
Subject: [PATCH 09/12] fix bug cause test failures with build

---
 llvm/tools/llvm-objdump/llvm-objdump.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 3f332f7db17e90..871efe62f9de22 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1514,7 +1514,8 @@ collectLocalBranchTargets(ArrayRef<uint8_t> Bytes, MCInstrAnalysis *MIA,
       if (Disassembled) {
         uint64_t Target;
         bool BranchTargetKnown = MIA->evaluateBranch(Inst, Index, Size, Target);
-        if (BranchTargetKnown && (Target >= Start && Target < End)) {
+        if (BranchTargetKnown && (Target >= Start && Target < End) &&
+            !Labels.count(Target)) {
           // On PowerPC and AIX, a function call is encoded as a branch to 0.
           // On other PowerPC platforms (ELF), a function call is encoded as
           // a branch to self. Do not add a label for these cases.
@@ -2322,9 +2323,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
           if (Disassembled && DT->InstrAnalysis) {
             llvm::raw_ostream *TargetOS = &FOS;
             uint64_t Target;
-            bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target);
-            if (DT->SubtargetInfo->getTargetTriple().isRISCV())
-              PrintTarget = DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target);
+            bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target) || 
+                               DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target);
             if (!PrintTarget) {
               if (std::optional<uint64_t> MaybeTarget =
                       DT->InstrAnalysis->evaluateMemoryOperandAddress(

>From 3fc4e31d140067a5fe1cdd7340494403d912edaa Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Tue, 19 Nov 2024 12:46:01 -0500
Subject: [PATCH 10/12] Instr eval based on reg width and attempt to pass tests

---
 llvm/include/llvm/MC/MCInstrAnalysis.h        |  4 ++-
 llvm/lib/MC/MCInstrAnalysis.cpp               |  2 +-
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp  |  6 ++--
 llvm/tools/llvm-objdump/llvm-objdump.cpp      | 29 +++++++++++++------
 4 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 71d55d4399cd65..1c32d4177effbf 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -181,9 +181,11 @@ class MCInstrAnalysis {
   evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
                  uint64_t &Target) const;
 
+  /// Given an instruction that accesses a menory address, try to compute
+  /// the target address. Return true success, and the address in Target.
   virtual bool
   evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
-                      uint64_t &Target) const;
+                      uint64_t &Target, int ArchRegWidth) const;
 
   /// Given an instruction tries to get the address of a memory operand. Returns
   /// the address on success.
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index da7127092b4082..41a55637527da7 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -32,7 +32,7 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
 
 bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
                                           uint64_t Addr, uint64_t Size,
-                                          uint64_t &Target) const {
+                                          uint64_t &Target, int ArchRegWidth) const {
   return false;
 }
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index a4e1d68beb5349..664f95ddd8510d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -247,15 +247,13 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
   }
 
   bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
-                           uint64_t &Target) const override {
+                           uint64_t &Target, int ArchRegWidth) const override {
     switch(Inst.getOpcode()) {
       default:
         return false;
       case RISCV::ADDI: {
         if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
-          // TODO: Figure out ways to find the actual value of XLEN during analysis
-          int XLEN = 32;
-          uint64_t Mask = ~((uint64_t)0) >> (64 - XLEN);
+          uint64_t Mask = ~((uint64_t)0) >> (64 - ArchRegWidth);
           Target = *TargetRegState + SignExtend64<12>(Inst.getOperand(2).getImm());
           Target &= Mask;
           return true;
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 871efe62f9de22..b45eb70312abdf 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -2323,8 +2323,9 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
           if (Disassembled && DT->InstrAnalysis) {
             llvm::raw_ostream *TargetOS = &FOS;
             uint64_t Target;
+            int TargetArchBitWidth = DT->SubtargetInfo->getTargetTriple().getArchPointerBitWidth();
             bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target) || 
-                               DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target);
+                               DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target, TargetArchBitWidth);
             if (!PrintTarget) {
               if (std::optional<uint64_t> MaybeTarget =
                       DT->InstrAnalysis->evaluateMemoryOperandAddress(
@@ -2353,27 +2354,37 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
               // N.B. Except for XCOFF, we don't walk the relocations in the
               // relocatable case yet.
               std::vector<const SectionSymbolsTy *> TargetSectionSymbols;
+              bool AbsoluteFirst = false;
               if (!Obj.isRelocatableObject()) {
                 auto It = llvm::partition_point(
                     SectionAddresses,
                     [=](const std::pair<uint64_t, SectionRef> &O) {
                       return O.first <= Target;
                     });
-                uint64_t TargetSecAddr = 0;
+                uint64_t TargetSecAddr = It == SectionAddresses.end() ? It->first : 0;
+                bool FoundSymbols = false;
+                // missing case where begin == end as in this case, we are to return 0
                 while (It != SectionAddresses.begin()) {
                   --It;
-                  if (TargetSecAddr == 0)
-                    TargetSecAddr = It->first;
-                  if (It->first != TargetSecAddr)
-                    break;
+                  if (It->first != TargetSecAddr) {
+                    if (FoundSymbols)
+                      break;
+                    else {
+                      TargetSecAddr = It->first;
+                      AbsoluteFirst = true;
+                    }
+                  }
                   TargetSectionSymbols.push_back(&AllSymbols[It->second]);
-                  if (AllSymbols[It->second].empty())
-                    TargetSecAddr = 0;
+                  if (!AllSymbols[It->second].empty())
+                    FoundSymbols = true;
                 }
               } else {
                 TargetSectionSymbols.push_back(&Symbols);
               }
-              TargetSectionSymbols.push_back(&AbsoluteSymbols);
+              if (AbsoluteFirst)
+                TargetSectionSymbols.insert(TargetSectionSymbols.begin(), &AbsoluteSymbols);
+              else
+                TargetSectionSymbols.push_back(&AbsoluteSymbols);
 
               // Find the last symbol in the first candidate section whose
               // offset is less than or equal to the target. If there are no

>From e425a7c59d83e741fb8f79d82b2f2c1933a72d24 Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Thu, 21 Nov 2024 12:09:29 -0500
Subject: [PATCH 11/12] address comments, merge instruction evaluation and pass
 register width through constructor

---
 llvm/include/llvm/MC/MCInstrAnalysis.h        |   3 +-
 llvm/lib/MC/MCInstrAnalysis.cpp               |   2 +-
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp  | 193 ++++++++----------
 llvm/tools/llvm-objdump/llvm-objdump.cpp      |   8 +-
 4 files changed, 86 insertions(+), 120 deletions(-)

diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 1c32d4177effbf..8e9418da395aca 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_MC_MCINSTRANALYSIS_H
 #define LLVM_MC_MCINSTRANALYSIS_H
 
+#include "TargetRegistry.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -185,7 +186,7 @@ class MCInstrAnalysis {
   /// the target address. Return true success, and the address in Target.
   virtual bool
   evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
-                      uint64_t &Target, int ArchRegWidth) const;
+                      uint64_t &Target, raw_ostream *TargetOS) const;
 
   /// Given an instruction tries to get the address of a memory operand. Returns
   /// the address on success.
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index 41a55637527da7..92590c6d03f945 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -32,7 +32,7 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
 
 bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
                                           uint64_t Addr, uint64_t Size,
-                                          uint64_t &Target, int ArchRegWidth) const {
+                                          uint64_t &Target, raw_ostream *TargetOS) const {
   return false;
 }
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 664f95ddd8510d..860358e3a268cb 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -127,6 +127,7 @@ namespace {
 class RISCVMCInstrAnalysis : public MCInstrAnalysis {
   int64_t GPRState[31] = {};
   std::bitset<31> GPRValidMask;
+  int ArchRegWidth;
 
   static bool isGPR(MCRegister Reg) {
     return Reg >= RISCV::X0 && Reg <= RISCV::X31;
@@ -163,8 +164,8 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
   }
 
 public:
-  explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info)
-      : MCInstrAnalysis(Info) {}
+  explicit RISCVMCInstrAnalysis(const MCInstrInfo *Info, int ArchRegWidth)
+      : MCInstrAnalysis(Info), ArchRegWidth(ArchRegWidth) {}
 
   void resetState() override { GPRValidMask.reset(); }
 
@@ -180,35 +181,28 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
     }
 
     switch (Inst.getOpcode()) {
-      case RISCV::LUI: {
-        setGPRState(Inst.getOperand(0).getReg(), 
-                    SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
-        break;
-      }
-      case RISCV::C_LUI: {
-        MCRegister Reg = Inst.getOperand(0).getReg();
-        if (Reg == RISCV::X2)
-          break;
-        setGPRState(Reg, SignExtend64<18>(Inst.getOperand(1).getImm() << 12));
-        break;
-
-      }
-      case RISCV::AUIPC: {
-        setGPRState(Inst.getOperand(0).getReg(), 
-                    Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
-        break;
-      }
-      default: {
-        // Clear the state of all defined registers for instructions that we don't
-        // explicitly support.
-        auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
-        for (unsigned I = 0; I < NumDefs; ++I) {
-          auto DefReg = Inst.getOperand(I).getReg();
-          if (isGPR(DefReg))
-            setGPRState(DefReg, std::nullopt);
-        }
-        break;
+    case RISCV::C_LUI:
+    case RISCV::LUI: {
+      setGPRState(Inst.getOperand(0).getReg(), 
+                  SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+      break;
+    }
+    case RISCV::AUIPC: {
+      setGPRState(Inst.getOperand(0).getReg(), 
+                  Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
+      break;
+    }
+    default: {
+      // Clear the state of all defined registers for instructions that we don't
+      // explicitly support.
+      auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+      for (unsigned I = 0; I < NumDefs; ++I) {
+        auto DefReg = Inst.getOperand(I).getReg();
+        if (isGPR(DefReg))
+          setGPRState(DefReg, std::nullopt);
       }
+      break;
+    }
     }
   }
 
@@ -247,90 +241,59 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
   }
 
   bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
-                           uint64_t &Target, int ArchRegWidth) const override {
+                           uint64_t &Target, raw_ostream *TargetOS) const override {
     switch(Inst.getOpcode()) {
-      default:
-        return false;
-      case RISCV::ADDI: {
-        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
-          uint64_t Mask = ~((uint64_t)0) >> (64 - ArchRegWidth);
-          Target = *TargetRegState + SignExtend64<12>(Inst.getOperand(2).getImm());
-          Target &= Mask;
-          return true;
-        }
-        break;
-      }
-      case RISCV::ADDIW: {
-        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
-          uint64_t Mask = ~((uint64_t)0) >> 32;
-          Target  = *TargetRegState + SignExtend64<12>(Inst.getOperand(2).getImm());
-          Target &= Mask;
-          Target = SignExtend64<32>(Target);
-          return true;
-        }
-        break;
-      }
-      case RISCV::C_ADDI: {
-        int64_t Offset = Inst.getOperand(2).getImm();
-        if (Offset == 0)
-          break;
-        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
-          Target = *TargetRegState + SignExtend64<6>(Offset);
-          return true;
-        }
-        break;
-      }
-      case RISCV::C_ADDIW: {
-        int64_t Offset = Inst.getOperand(2).getImm(); 
-        if (Offset == 0)
-          break;
-        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
-          uint64_t Mask = ~((uint64_t)0) >> 32;
-          Target &= Mask;
-          Target = *TargetRegState + SignExtend64<6>(Offset);
-          Target = SignExtend64<32>(Target);
-          return true;
-        }
-        break;
+    default:
+      return false;
+    case RISCV::C_ADDI:
+    case RISCV::ADDI: {
+      if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+        Target = *TargetRegState + Inst.getOperand(2).getImm();
+        Target &= maskTrailingOnes<uint64_t>(ArchRegWidth);
+        return true;
       }
-      case RISCV::LB:
-      case RISCV::LH:
-      case RISCV::LD:
-      case RISCV::LW:
-      case RISCV::LBU:
-      case RISCV::LHU:
-      case RISCV::LWU:
-      case RISCV::SB:
-      case RISCV::SH:
-      case RISCV::SW:
-      case RISCV::SD:
-      case RISCV::FLH:
-      case RISCV::FLW:
-      case RISCV::FLD:
-      case RISCV::FSH:
-      case RISCV::FSW:
-      case RISCV::FSD: {
-        int64_t Offset = SignExtend64<12>(Inst.getOperand(2).getImm());
-        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg()))
-          Target = *TargetRegState + Offset;
-        else
-          Target = Offset;
+      break;
+    }
+    case RISCV::C_ADDIW:
+    case RISCV::ADDIW: {
+      if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+        Target = *TargetRegState + Inst.getOperand(2).getImm();
+        Target = SignExtend64<32>(Target);
         return true;
       }
-      case RISCV::C_LD:
-      case RISCV::C_SD:
-      case RISCV::C_FLD:
-      case RISCV::C_FSD:
-      case RISCV::C_SW:
-      case RISCV::C_LW:
-      case RISCV::C_FSW:
-      case RISCV::C_FLW: {
-        if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
-          Target = *TargetRegState + Inst.getOperand(2).getImm();
-          return true;
-        }
-        break;
+      break;
+    }
+    case RISCV::LB:
+    case RISCV::LH:
+    case RISCV::LD:
+    case RISCV::LW:
+    case RISCV::LBU:
+    case RISCV::LHU:
+    case RISCV::LWU:
+    case RISCV::SB:
+    case RISCV::SH:
+    case RISCV::SW:
+    case RISCV::SD:
+    case RISCV::FLH:
+    case RISCV::FLW:
+    case RISCV::FLD:
+    case RISCV::FSH:
+    case RISCV::FSW:
+    case RISCV::FSD:
+    case RISCV::C_LD:
+    case RISCV::C_SD:
+    case RISCV::C_FLD:
+    case RISCV::C_FSD:
+    case RISCV::C_SW:
+    case RISCV::C_LW:
+    case RISCV::C_FSW:
+    case RISCV::C_FLW: {
+      if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+        Target = *TargetRegState + Inst.getOperand(2).getImm();
+        return true;
       }
+      break;
+    }
     }
     return false;
   }
@@ -428,8 +391,12 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
 
 } // end anonymous namespace
 
-static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
-  return new RISCVMCInstrAnalysis(Info);
+static MCInstrAnalysis *createRISCV32InstrAnalysis(const MCInstrInfo *Info) {
+  return new RISCVMCInstrAnalysis(Info, 32);
+}
+
+static MCInstrAnalysis *createRISCV64InstrAnalysis(const MCInstrInfo *Info) {
+  return new RISCVMCInstrAnalysis(Info, 64);
 }
 
 namespace {
@@ -455,12 +422,12 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
     TargetRegistry::RegisterELFStreamer(*T, createRISCVELFStreamer);
     TargetRegistry::RegisterObjectTargetStreamer(
         *T, createRISCVObjectTargetStreamer);
-    TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis);
-
     // Register the asm target streamer.
     TargetRegistry::RegisterAsmTargetStreamer(*T, createRISCVAsmTargetStreamer);
     // Register the null target streamer.
     TargetRegistry::RegisterNullTargetStreamer(*T,
                                                createRISCVNullTargetStreamer);
   }
+    TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV32Target(), createRISCV32InstrAnalysis);
+    TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV64Target(), createRISCV64InstrAnalysis);
 }
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index b45eb70312abdf..5a48a654772d32 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -2323,9 +2323,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
           if (Disassembled && DT->InstrAnalysis) {
             llvm::raw_ostream *TargetOS = &FOS;
             uint64_t Target;
-            int TargetArchBitWidth = DT->SubtargetInfo->getTargetTriple().getArchPointerBitWidth();
             bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target) || 
-                               DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target, TargetArchBitWidth);
+                               DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target, TargetOS);
             if (!PrintTarget) {
               if (std::optional<uint64_t> MaybeTarget =
                       DT->InstrAnalysis->evaluateMemoryOperandAddress(
@@ -2361,9 +2360,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
                     [=](const std::pair<uint64_t, SectionRef> &O) {
                       return O.first <= Target;
                     });
-                uint64_t TargetSecAddr = It == SectionAddresses.end() ? It->first : 0;
+                uint64_t TargetSecAddr = It == SectionAddresses.end() ? 0 : It->first;
                 bool FoundSymbols = false;
-                // missing case where begin == end as in this case, we are to return 0
                 while (It != SectionAddresses.begin()) {
                   --It;
                   if (It->first != TargetSecAddr) {
@@ -2491,7 +2489,7 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
                           << ">";
               } else if (LabelAvailable) {
                 *TargetOS << " <" << AllLabels[Target] << ">";
-              } 
+              }
               // By convention, each record in the comment stream should be
               // terminated.
               if (TargetOS == &CommentStream)

>From 571f056fa70dfcba6ab486fe14dd543cba3581fa Mon Sep 17 00:00:00 2001
From: arjunUpatel <arjunpatel151002 at gmail.com>
Date: Thu, 21 Nov 2024 12:19:00 -0500
Subject: [PATCH 12/12] remove debugging code

---
 llvm/include/llvm/MC/MCInstrAnalysis.h                   | 6 +++---
 llvm/lib/MC/MCInstrAnalysis.cpp                          | 6 +++---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp | 9 ++++++---
 llvm/tools/llvm-objdump/llvm-objdump.cpp                 | 7 +++++--
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 8e9418da395aca..fb893d8e2f4eb5 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -184,9 +184,9 @@ class MCInstrAnalysis {
 
   /// Given an instruction that accesses a menory address, try to compute
   /// the target address. Return true success, and the address in Target.
-  virtual bool
-  evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
-                      uint64_t &Target, raw_ostream *TargetOS) const;
+  virtual bool evaluateInstruction(const MCInst &Inst, uint64_t Addr,
+                                   uint64_t Size, uint64_t &Target,
+                                   raw_ostream *TargetOS) const;
 
   /// Given an instruction tries to get the address of a memory operand. Returns
   /// the address on success.
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index 92590c6d03f945..c685883df9acc7 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -30,9 +30,9 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
   return false;
 }
 
-bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst,
-                                          uint64_t Addr, uint64_t Size,
-                                          uint64_t &Target, raw_ostream *TargetOS) const {
+bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst, uint64_t Addr,
+                                          uint64_t Size, uint64_t &Target,
+                                          raw_ostream *TargetOS) const {
   return false;
 }
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 860358e3a268cb..125f8920461c1c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -241,7 +241,8 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
   }
 
   bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
-                           uint64_t &Target, raw_ostream *TargetOS) const override {
+                           uint64_t &Target,
+                           raw_ostream *TargetOS) const override {
     switch(Inst.getOpcode()) {
     default:
       return false;
@@ -428,6 +429,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
     TargetRegistry::RegisterNullTargetStreamer(*T,
                                                createRISCVNullTargetStreamer);
   }
-    TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV32Target(), createRISCV32InstrAnalysis);
-    TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV64Target(), createRISCV64InstrAnalysis);
+  TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV32Target(),
+                                          createRISCV32InstrAnalysis);
+  TargetRegistry::RegisterMCInstrAnalysis(getTheRISCV64Target(),
+                                          createRISCV64InstrAnalysis);
 }
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 5a48a654772d32..938b753c134ddc 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -2323,8 +2323,11 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj,
           if (Disassembled && DT->InstrAnalysis) {
             llvm::raw_ostream *TargetOS = &FOS;
             uint64_t Target;
-            bool PrintTarget = DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index, Size, Target) || 
-                               DT->InstrAnalysis->evaluateInstruction(Inst, SectionAddr + Index, Size, Target, TargetOS);
+            bool PrintTarget =
+                DT->InstrAnalysis->evaluateBranch(Inst, SectionAddr + Index,
+                                                  Size, Target) ||
+                DT->InstrAnalysis->evaluateInstruction(
+                    Inst, SectionAddr + Index, Size, Target, TargetOS);
             if (!PrintTarget) {
               if (std::optional<uint64_t> MaybeTarget =
                       DT->InstrAnalysis->evaluateMemoryOperandAddress(



More information about the llvm-commits mailing list