[llvm] [RISCV] Support instruction sizes up to 176-bits in disassembler. (PR #90371)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 27 18:41:40 PDT 2024


https://github.com/topperc created https://github.com/llvm/llvm-project/pull/90371

None

>From 4a2f7b6f880003325415f5f545dcac94c28ac016 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Sat, 27 Apr 2024 18:39:09 -0700
Subject: [PATCH] [RISCV] Support instruction sizes up to 176-bits in
 disassembler.

---
 .../RISCV/Disassembler/RISCVDisassembler.cpp  | 42 ++++++++++++++++---
 llvm/test/MC/RISCV/large-instructions.s       | 29 +++++++++++++
 2 files changed, 66 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/large-instructions.s

diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 7ca20190731ad8..c43bda8521fbf7 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -656,12 +656,44 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                                                ArrayRef<uint8_t> Bytes,
                                                uint64_t Address,
                                                raw_ostream &CS) const {
-  // TODO: This will need modification when supporting instruction set
-  // extensions with instructions > 32-bits (up to 176 bits wide).
+  // It's a 16 bit instruction if bit 0 and 1 are not 0x3.
+  if ((Bytes[0] & 0x3) != 0x3)
+    return getInstruction16(MI, Size, Bytes, Address, CS);
 
-  // It's a 32 bit instruction if bit 0 and 1 are 1.
-  if ((Bytes[0] & 0x3) == 0x3)
+  // It's a 32 bit instruction if bit 1:0 are 0x3(checked above) and bits 4:2
+  // are not 0x3.
+  if ((Bytes[0] & 0x1f) != 0x1f)
     return getInstruction32(MI, Size, Bytes, Address, CS);
 
-  return getInstruction16(MI, Size, Bytes, Address, CS);
+  // 48-bit instructions are encoded as 0bxx011111.
+  if ((Bytes[0] & 0x3f) == 0x1f) {
+    Size = Bytes.size() >= 6 ? 6 : 0;
+    return MCDisassembler::Fail;
+  }
+
+  // 64-bit instructions are encoded as 0bx0111111.
+  if ((Bytes[0] & 0x7f) == 0x3f) {
+    Size = Bytes.size() >= 8 ? 8 : 0;
+    return MCDisassembler::Fail;
+  }
+
+  // Need to read a second byte.
+  if (Bytes.size() < 2) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  // 80-bit through 176-bit instructions are encoded as 0bxnnnxxxx_x1111111.
+  // Where number of bits is (80 + (nnn * 16)) for nnn != 0b111.
+  unsigned nnn = (Bytes[1] >> 4) & 0x7;
+  if (nnn != 0x7) {
+    Size = 10 + (nnn * 2);
+    if (Bytes.size() < Size)
+      Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  // Remaining encodings are reserved for > 176-bit instructions.
+  Size = 0;
+  return MCDisassembler::Fail;
 }
diff --git a/llvm/test/MC/RISCV/large-instructions.s b/llvm/test/MC/RISCV/large-instructions.s
new file mode 100644
index 00000000000000..b50dbde17d3801
--- /dev/null
+++ b/llvm/test/MC/RISCV/large-instructions.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \
+# RUN:     | llvm-objdump -d - | FileCheck %s
+
+# CHECK: 011f 4523 8967 <unknown>
+.byte 0x1f, 0x01, 0x23, 0x45, 0x67, 0x89
+
+# CHECK: 4523013f cdab8967 <unknown>
+.byte 0x3f, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd
+
+# CHECK: 007f 4523 8967 cdab feef <unknown>
+.byte 0x7f, 0x00, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe
+
+# CHECK: 4523107f cdab8967 badcfeef <unknown>
+.byte 0x7f, 0x10, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba
+
+# CHECK: 207f 4523 8967 cdab feef badc 7698 <unknown>
+.byte 0x7f, 0x20, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76
+
+# CHECK: 4523307f cdab8967 badcfeef 32547698 <unknown>
+.byte 0x7f, 0x30, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32
+
+# CHECK: 407f 4523 8967 cdab feef badc 7698 3254 1210 <unknown>
+.byte 0x7f, 0x40, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12
+
+# CHECK: 4523507f cdab8967 badcfeef 32547698 56341210 <unknown>
+.byte 0x7f, 0x50, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56
+
+# CHECK: 607f 4523 8967 cdab feef badc 7698 3254 1210 5634 9a78 <unknown>
+.byte 0x7f, 0x60, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56, 0x78, 0x9a



More information about the llvm-commits mailing list