[llvm] [RISCV] Basic Objdump Mapping Symbol Support (PR #151452)
Sam Elliott via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 30 22:06:25 PDT 2025
https://github.com/lenary created https://github.com/llvm/llvm-project/pull/151452
This implements very basic support for RISC-V mapping symbols in llvm-objdump, sharing the implementation with how Arm/AArch64/CSKY implement this feature.
This only supports the `$x` (instruction) and `$d` (data) mapping symbols for RISC-V, and not the version of `$x` which includes an architecture string suffix.
>From 3d3970661fde13322a7bcf6fad6149b7f9715dde Mon Sep 17 00:00:00 2001
From: Sam Elliott <aelliott at qti.qualcomm.com>
Date: Wed, 30 Jul 2025 21:30:33 -0700
Subject: [PATCH] [RISCV] Basic Objdump Mapping Symbol Support
This implements very basic support for RISC-V mapping symbols in
llvm-objdump, sharing the implementation with how Arm/AArch64/CSKY
implement this feature.
This only supports the `$x` (instruction) and `$d` (data) mapping
symbols for RISC-V, and not the version of `$x` which includes an
architecture suffix.
---
llvm/docs/ReleaseNotes.md | 5 ++
.../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 2 +
.../RISCV/riscv-mapping-symbols.s | 20 +++++
llvm/test/MC/RISCV/large-instructions.s | 29 ------
llvm/test/MC/RISCV/large-instructions.test | 88 +++++++++++++++++++
llvm/test/MC/RISCV/nop-slide.s | 13 ++-
llvm/test/MC/RISCV/rvv/vsetvl-invalid.s | 24 ++---
llvm/tools/llvm-objdump/llvm-objdump.cpp | 7 +-
8 files changed, 138 insertions(+), 50 deletions(-)
create mode 100644 llvm/test/MC/Disassembler/RISCV/riscv-mapping-symbols.s
delete mode 100644 llvm/test/MC/RISCV/large-instructions.s
create mode 100644 llvm/test/MC/RISCV/large-instructions.test
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 021f321bd9dc2..4c84411bd9ed8 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -104,6 +104,11 @@ Changes to the PowerPC Backend
Changes to the RISC-V Backend
-----------------------------
+* `llvm-objdump` now has basic support for switching between disassembling code
+ and data using mapping symbols such as `$x` and `$d`. Switching architectures
+ using `$x` with an architecture string suffix is not yet supported.
+
+
Changes to the WebAssembly Backend
----------------------------------
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 82e3b5ceb4ef6..782c5670c983e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -482,6 +482,8 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
Count -= 1;
}
+ // TODO: emit a mapping symbol right here
+
if (Count % 4 == 2) {
// The canonical nop with Zca is c.nop.
OS.write(STI->hasFeature(RISCV::FeatureStdExtZca) ? "\x01\0" : "\0\0", 2);
diff --git a/llvm/test/MC/Disassembler/RISCV/riscv-mapping-symbols.s b/llvm/test/MC/Disassembler/RISCV/riscv-mapping-symbols.s
new file mode 100644
index 0000000000000..ff15008b8e3f8
--- /dev/null
+++ b/llvm/test/MC/Disassembler/RISCV/riscv-mapping-symbols.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc --triple=riscv32-unknown-none-elf %s -filetype=obj -o - \
+# RUN: | llvm-objdump -dr - \
+# RUN: | FileCheck %s
+# RUN: llvm-mc --triple=riscv64-unknown-none-elf %s -filetype=obj -o - \
+# RUN: | llvm-objdump -dr - \
+# RUN: | FileCheck %s
+
+
+ # CHECK: 00000013 nop
+ nop
+
+ # CHECK-NEXT: 55 55 55 55 .word 0x55555555
+ .word 0x55555555
+
+ # CHECK-NEXT: 00 00 00 00 .word 0x00000000
+ # CHECK-NEXT: R_RISCV_32 foo
+ .word foo
+
+ # CHECK-NEXT: 00000013 nop
+ nop
diff --git a/llvm/test/MC/RISCV/large-instructions.s b/llvm/test/MC/RISCV/large-instructions.s
deleted file mode 100644
index b50dbde17d380..0000000000000
--- a/llvm/test/MC/RISCV/large-instructions.s
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \
-# RUN: | llvm-objdump -d - | FileCheck %s
-
-# CHECK: 011f 4523 8967 <unknown>
-.byte 0x1f, 0x01, 0x23, 0x45, 0x67, 0x89
-
-# CHECK: 4523013f cdab8967 <unknown>
-.byte 0x3f, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd
-
-# CHECK: 007f 4523 8967 cdab feef <unknown>
-.byte 0x7f, 0x00, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe
-
-# CHECK: 4523107f cdab8967 badcfeef <unknown>
-.byte 0x7f, 0x10, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba
-
-# CHECK: 207f 4523 8967 cdab feef badc 7698 <unknown>
-.byte 0x7f, 0x20, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76
-
-# CHECK: 4523307f cdab8967 badcfeef 32547698 <unknown>
-.byte 0x7f, 0x30, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32
-
-# CHECK: 407f 4523 8967 cdab feef badc 7698 3254 1210 <unknown>
-.byte 0x7f, 0x40, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12
-
-# CHECK: 4523507f cdab8967 badcfeef 32547698 56341210 <unknown>
-.byte 0x7f, 0x50, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56
-
-# CHECK: 607f 4523 8967 cdab feef badc 7698 3254 1210 5634 9a78 <unknown>
-.byte 0x7f, 0x60, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56, 0x78, 0x9a
diff --git a/llvm/test/MC/RISCV/large-instructions.test b/llvm/test/MC/RISCV/large-instructions.test
new file mode 100644
index 0000000000000..0552c94c34d64
--- /dev/null
+++ b/llvm/test/MC/RISCV/large-instructions.test
@@ -0,0 +1,88 @@
+# RUN: yaml2obj -o - %s \
+# RUN: | llvm-objdump -d - | FileCheck %s
+
+## This CHECKs objdump's handling of wide instruction encodings, and how it
+## groups the instruction bytes when disassembling.
+##
+## This is written in YAML because using `.byte` emits the wrong mapping
+## symbols.
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS32
+ Data: ELFDATA2LSB
+ Type: ET_REL
+ Machine: EM_RISCV
+ SectionHeaderStringTable: .strtab
+Sections:
+ - Name: .text.six_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 1F0123456789
+ # CHECK: 011f 4523 8967 <unknown>
+ - Name: .text.eight_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 3F0123456789ABCD
+ # CHECK: 4523013f cdab8967 <unknown>
+ - Name: .text.ten_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 7F0023456789ABCDEFFE
+ # CHECK: 007f 4523 8967 cdab feef <unknown>
+ - Name: .text.twelve_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 7F1023456789ABCDEFFEDCBA
+ # CHECK: 4523107f cdab8967 badcfeef <unknown>
+ - Name: .text.fourteen_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 7F2023456789ABCDEFFEDCBA9876
+ # CHECK: 207f 4523 8967 cdab feef badc 7698 <unknown>
+ - Name: .text.sixteen_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 7F3023456789ABCDEFFEDCBA98765432
+ # CHECK: 4523307f cdab8967 badcfeef 32547698 <unknown>
+ - Name: .text.eighteen_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 7F4023456789ABCDEFFEDCBA987654321012
+ # CHECK: 407f 4523 8967 cdab feef badc 7698 3254 1210 <unknown>
+ - Name: .text.twenty_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 7F5023456789ABCDEFFEDCBA9876543210123456
+ # CHECK: 4523507f cdab8967 badcfeef 32547698 56341210 <unknown>
+ - Name: .text.twentytwo_byte
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ AddressAlign: 0x1
+ Content: 7F6023456789ABCDEFFEDCBA9876543210123456789A
+ # CHECK: 607f 4523 8967 cdab feef badc 7698 3254 1210 5634 9a78 <unknown>
+ - Type: SectionHeaderTable
+ Sections:
+ - Name: .strtab
+ - Name: .text.six_byte
+ - Name: .text.eight_byte
+ - Name: .text.ten_byte
+ - Name: .text.twelve_byte
+ - Name: .text.fourteen_byte
+ - Name: .text.sixteen_byte
+ - Name: .text.eighteen_byte
+ - Name: .text.twenty_byte
+ - Name: .text.twentytwo_byte
+ - Name: .symtab
+Symbols:
+ - Name: '$x'
+ Section: .text.six_byte
+...
diff --git a/llvm/test/MC/RISCV/nop-slide.s b/llvm/test/MC/RISCV/nop-slide.s
index 4dc888b3ba777..a49ffdc0e420c 100644
--- a/llvm/test/MC/RISCV/nop-slide.s
+++ b/llvm/test/MC/RISCV/nop-slide.s
@@ -10,18 +10,15 @@
auipc a0, 0
# CHECK-RVC-NORELAX: 0000000000000000 <.text>:
-# CHECK-RVC-NORELAX-NEXT: 0: 0000 unimp
-# CHECK-RVC-NORELAX-NEXT: 2: 0001 nop
+# CHECK-RVC-NORELAX-NEXT: 0: 00 00 01 00 .word 0x00010000
# CHECK-RVC-NORELAX-NEXT: 4: 00000517 auipc a0, 0x0
# CHECK-RVC-RELAX: 0000000000000000 <.text>:
# CHECK-RVC-RELAX-NEXT: 0: 0001 nop
-# CHECK-RVC-RELAX-NEXT: 2: 0100 addi s0, sp, 0x80
-# CHECK-RVC-RELAX-NEXT: 4: 1700 addi s0, sp, 0x3a0
-# CHECK-RVC-RELAX-NEXT: 6: 0005 c.nop 0x1
-# CHECK-RVC-RELAX-NEXT: 8: 00 <unknown>
+# CHECK-RVC-RELAX-NEXT: 2: 00 01 .short 0x0100
+# CHECK-RVC-RELAX-NEXT: 4: 00 .byte 0x00
+# CHECK-RVC-RELAX-NEXT: 5: 00000517 auipc a0, 0x0
# CHECK: 0000000000000000 <.text>:
-# CHECK-NEXT: 0: 0000 <unknown>
-# CHECK-NEXT: 2: 0000 <unknown>
+# CHECK-NEXT: 0: 00 00 00 00 .word 0x00000000
# CHECK-NEXT: 4: 00000517 auipc a0, 0x0
diff --git a/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s b/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s
index b45f3f2dfc854..d97b53803b97a 100644
--- a/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s
+++ b/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s
@@ -4,37 +4,37 @@
# RUN: | llvm-objdump -d --mattr=+v - | FileCheck %s
# CHECK: vsetvli a1, a0, e64, m1, tu, mu
-.word 0x018575d7
+.insn 4, 0x018575d7
# CHECK: vsetvli a1, a0, 0x1c
-.word 0x01c575d7
+.insn 4, 0x01c575d7
# CHECK: vsetvli a1, a0, 0x24
-.word 0x024575d7
+.insn 4, 0x024575d7
# CHECK: vsetvli a1, a0, 0x29
-.word 0x029575d7
+.insn 4, 0x029575d7
# CHECK: vsetvli a1, a0, 0x110
-.word 0x110575d7
+.insn 4, 0x110575d7
# CHECK: vsetvli a1, a0, e64, mf8, tu, mu
-.word 0x01d575d7
+.insn 4, 0x01d575d7
# CHECK: vsetivli a1, 0x10, e8, m4, tu, mu
-.word 0xc02875d7
+.insn 4, 0xc02875d7
# CHECK: vsetivli a1, 0x10, 0xc
-.word 0xc0c875d7
+.insn 4, 0xc0c875d7
# CHECK: vsetivli a1, 0x10, 0x14
-.word 0xc14875d7
+.insn 4, 0xc14875d7
# CHECK: vsetivli a1, 0x10, 0x38
-.word 0xc38875d7
+.insn 4, 0xc38875d7
# CHECK: vsetivli a1, 0x10, 0x103
-.word 0xd03875d7
+.insn 4, 0xd03875d7
# CHECK: vsetivli a1, 0x10, e8, mf4, tu, mu
-.word 0xc06875d7
+.insn 4, 0xc06875d7
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 0316c4ba51da6..541b09a297621 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -633,8 +633,13 @@ static bool isCSKYElf(const ObjectFile &Obj) {
return Elf && Elf->getEMachine() == ELF::EM_CSKY;
}
+static bool isRISCVElf(const ObjectFile &Obj) {
+ const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj);
+ return Elf && Elf->getEMachine() == ELF::EM_RISCV;
+}
+
static bool hasMappingSymbols(const ObjectFile &Obj) {
- return isArmElf(Obj) || isAArch64Elf(Obj) || isCSKYElf(Obj);
+ return isArmElf(Obj) || isAArch64Elf(Obj) || isCSKYElf(Obj) || isRISCVElf(Obj);
}
static void printRelocation(formatted_raw_ostream &OS, StringRef FileName,
More information about the llvm-commits
mailing list