[llvm] [RISCV][MC] Support Assembling 48- and 64-bit Instructions (PR #110022)

Mon Oct 7 08:38:28 PDT 2024

https://github.com/lenary updated https://github.com/llvm/llvm-project/pull/110022

>From 3d554c629373757b0eae2484dd38fe2074d89d1f Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Wed, 25 Sep 2024 09:37:53 -0700
Subject: [PATCH 1/8] [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

This adds `.insn` support for assembling instructions of 48- and
64-bits. Disassembly already knows to bunch up the instruction bits for
these instructions, but will print the disassebly as `<unknown>`.

This changes some error messages so they are a little clearer.

Co-authored-by: Sudharsan Veeravalli <quic_svs at quicinc.com>
---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 46 ++++++++++++++-----
 .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h |  2 +
 .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 15 ++++++
 llvm/lib/Target/RISCV/RISCVInstrFormats.td    | 16 +++++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       | 12 +++++
 llvm/test/MC/RISCV/insn-invalid.s             | 35 ++++++++++++--
 llvm/test/MC/RISCV/insn.s                     | 10 ++++
 7 files changed, 121 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 5e29a92f0bacd6..f4e6cccd1b2878 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -707,6 +707,8 @@ struct RISCVOperand final : public MCParsedAsmOperand {
   bool isUImm16() const { return IsUImm<16>(); }
   bool isUImm20() const { return IsUImm<20>(); }
   bool isUImm32() const { return IsUImm<32>(); }
+  bool isUImm48() const { return IsUImm<48>(); }
+  bool isUImm64() const { return IsUImm<64>(); }
 
   bool isUImm8GE32() const {
     int64_t Imm;
@@ -3146,7 +3148,7 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
   StringRef Format;
   SMLoc ErrorLoc = Parser.getTok().getLoc();
   if (Parser.parseIdentifier(Format)) {
-    // Try parsing .insn [length], value
+    // Try parsing .insn [ length , ] value
     int64_t Length = 0;
     int64_t Value = 0;
     if (Parser.parseIntToken(
@@ -3158,23 +3160,45 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
         return true;
     }
 
-    // TODO: Add support for long instructions
-    int64_t RealLength = (Value & 3) == 3 ? 4 : 2;
-    if (!isUIntN(RealLength * 8, Value))
-      return Error(ErrorLoc, "invalid operand for instruction");
-    if (RealLength == 2 && !AllowC)
+    // TODO: Support Instructions > 64 bits.
+    if (Length > 8)
+      return Error(ErrorLoc,
+                   "instruction lengths over 64 bits are not supported");
+
+    int64_t EncodingDerivedLength = 4;
+    unsigned Opcode = RISCV::Insn32;
+    if ((Value & 0b11) != 0b11) {
+      EncodingDerivedLength = 2;
+      Opcode = RISCV::Insn16;
+    } else
+      switch (Value & 0b111'1111) {
+      case 0b001'1111:
+      case 0b101'1111:
+        EncodingDerivedLength = 6;
+        Opcode = RISCV::Insn48;
+        break;
+      case 0b011'1111:
+        EncodingDerivedLength = 8;
+        Opcode = RISCV::Insn64;
+        break;
+      case 0b111'1111:
+        // TODO: Support Instructions > 64 bits.
+        return Error(ErrorLoc,
+                     "instruction lengths over 64 bits are not supported");
+      }
+    if (Length != 0 && Length != EncodingDerivedLength)
+      return Error(ErrorLoc, "instruction length does not match the encoding");
+    if (!isUIntN(EncodingDerivedLength * 8, Value))
+      return Error(ErrorLoc, "encoding value does not fit into instruction");
+    if (!AllowC && (EncodingDerivedLength == 2))
       return Error(ErrorLoc, "compressed instructions are not allowed");
-    if (Length != 0 && Length != RealLength)
-      return Error(ErrorLoc, "instruction length mismatch");
 
     if (getParser().parseEOL("invalid operand for instruction")) {
       getParser().eatToEndOfStatement();
       return true;
     }
 
-    emitToStreamer(getStreamer(), MCInstBuilder(RealLength == 2 ? RISCV::Insn16
-                                                                : RISCV::Insn32)
-                                      .addImm(Value));
+    emitToStreamer(getStreamer(), MCInstBuilder(Opcode).addImm(Value));
     return false;
   }
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index cf3ea3e4ea2131..d82f78498418da 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -309,6 +309,8 @@ enum OperandType : unsigned {
   OPERAND_UIMM12,
   OPERAND_UIMM16,
   OPERAND_UIMM32,
+  OPERAND_UIMM48,
+  OPERAND_UIMM64,
   OPERAND_ZERO,
   OPERAND_SIMM5,
   OPERAND_SIMM5_PLUS1,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index eb21498d15e86c..66970ed37f2724 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -355,6 +355,21 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI,
     support::endian::write(CB, Bits, llvm::endianness::little);
     break;
   }
+  case 6: {
+    uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI) & 0xffff'ffff'ffffu;
+    SmallVector<char, 8> Encoding;
+    support::endian::write(Encoding, Bits, llvm::endianness::little);
+    assert(Encoding[6] == 0 && Encoding[7] == 0 &&
+           "Unexpected encoding for 48-bit instruction");
+    Encoding.truncate(6);
+    CB.append(Encoding);
+    break;
+  }
+  case 8: {
+    uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+    support::endian::write(CB, Bits, llvm::endianness::little);
+    break;
+  }
   }
 
   ++MCNumEmitted; // Keep track of the # of mi's emitted.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index fcea18f81b3901..013c26c72bfd55 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -266,6 +266,22 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
   let Size = 4;
 }
 
+class RVInst48<dag outs, dag ins, string opcodestr, string argstr,
+               list<dag> pattern, InstFormat format>
+    : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+  let Size = 6;
+}
+
+class RVInst64<dag outs, dag ins, string opcodestr, string argstr,
+               list<dag> pattern, InstFormat format>
+    : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
+  field bits<64> Inst;
+  field bits<64> SoftFail = 0;
+  let Size = 8;
+}
+
 // Pseudo instructions
 class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string argstr = "">
     : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index fe5623e2920e22..34439880c3a31a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -235,6 +235,8 @@ def uimm7 : RISCVUImmOp<7>;
 def uimm8 : RISCVUImmOp<8>;
 def uimm16 : RISCVUImmOp<16>;
 def uimm32 : RISCVUImmOp<32>;
+def uimm48 : RISCVUImmOp<48>;
+def uimm64 : RISCVUImmOp<64>;
 def simm12 : RISCVSImmLeafOp<12> {
   let MCOperandPredicate = [{
     int64_t Imm;
@@ -1135,6 +1137,16 @@ def Insn32 : RVInst<(outs), (ins uimm32:$value), "", "", [], InstFormatOther> {
   let Inst{31-0} = value;
   let AsmString = ".insn 0x4, $value";
 }
+def Insn48 : RVInst48<(outs), (ins uimm48:$value), "", "", [], InstFormatOther> {
+  bits<48> value;
+  let Inst{47-0} = value;
+  let AsmString = ".insn 0x6, $value";
+}
+def Insn64 : RVInst64<(outs), (ins uimm64:$value), "", "", [], InstFormatOther> {
+  bits<64> value;
+  let Inst{63-0} = value;
+  let AsmString = ".insn 0x8, $value";
+}
 }
 
 // Use InstAliases to match these so that we can combine the insn and format
diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s
index d6fabea4e17016..e32619976108ac 100644
--- a/llvm/test/MC/RISCV/insn-invalid.s
+++ b/llvm/test/MC/RISCV/insn-invalid.s
@@ -26,8 +26,35 @@
 
 .insn . # CHECK: :[[@LINE]]:7: error: expected instruction format or an integer constant
 .insn 0x2, # CHECK: :[[@LINE]]:12: error: expected an integer constant
-.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch
-.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch
-.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: invalid operand for instruction
-.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
+
 .insn 0x4, 0x13, 0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction
+
+.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+
+.insn 10, 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+.insn 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+
+.insn 0x2, 0x03 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x2, 0x1f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x2, 0x3f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+
+.insn 0x4, 0x00000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x4, 0x0000001f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x4, 0x0000003f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+
+.insn 0x6, 0x000000000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x6, 0x000000000013 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x6, 0x00000000003f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+
+.insn 0x8, 0x0000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x8, 0x0000013 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x8, 0x000001f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+
+.insn 0x2, 0xffff0001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x4, 0xffff00000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x6, 0xffff00000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+
+.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
+.insn 0x2, 0x0001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s
index b95c3b87b442f2..ff09393a6b285c 100644
--- a/llvm/test/MC/RISCV/insn.s
+++ b/llvm/test/MC/RISCV/insn.s
@@ -164,3 +164,13 @@ target:
 # CHECK-ASM: encoding: [0x13,0x00,0x00,0x00]
 # CHECK-OBJ: addi zero, zero, 0x0
 .insn 0x4, 0x13
+
+# CHECK-ASM: .insn 0x6, 31
+# CHECK-ASM: encoding: [0x1f,0x00,0x00,0x00,0x00,0x00]
+# CHECK-OBJ: <unknown>
+.insn 6, 0x1f
+
+# CHECK-ASM: .insn 0x8, 63
+# CHECK-ASM: encoding: [0x3f,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+# CHECK-OBJ: <unknown>
+.insn 8, 0x3f

>From 90f096cadbe35a03be04b8f573eea2e6faf8c518 Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Wed, 25 Sep 2024 11:34:56 -0700
Subject: [PATCH 2/8] fixup! [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

---
 llvm/test/MC/RISCV/insn.s | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s
index ff09393a6b285c..b9963dd70d36ff 100644
--- a/llvm/test/MC/RISCV/insn.s
+++ b/llvm/test/MC/RISCV/insn.s
@@ -170,7 +170,17 @@ target:
 # CHECK-OBJ: <unknown>
 .insn 6, 0x1f
 
+# CHECK-ASM: .insn 0x6, 65503
+# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00,0x00,0x00]
+# CHECK-OBJ: <unknown>
+.insn 0xffdf
+
 # CHECK-ASM: .insn 0x8, 63
 # CHECK-ASM: encoding: [0x3f,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 # CHECK-OBJ: <unknown>
 .insn 8, 0x3f
+
+# CHECK-ASM: .insn 0x8, 65471
+# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00,0x00,0x00,0x00,0x00]
+# CHECK-OBJ: <unknown>
+.insn 0xffbf

>From 306c1bdbc59895143d4fa0806d7166d7ed3626b6 Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Thu, 26 Sep 2024 06:50:37 -0700
Subject: [PATCH 3/8] fixup! [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 77 +++++++++++--------
 llvm/test/MC/RISCV/insn-invalid.s             | 25 +++---
 llvm/test/MC/RISCV/insn.s                     |  8 +-
 3 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index f4e6cccd1b2878..f42aac2816086d 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -41,6 +41,7 @@
 #include "llvm/TargetParser/RISCVISAInfo.h"
 
 #include <limits>
+#include <optional>
 
 using namespace llvm;
 
@@ -3149,7 +3150,7 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
   SMLoc ErrorLoc = Parser.getTok().getLoc();
   if (Parser.parseIdentifier(Format)) {
     // Try parsing .insn [ length , ] value
-    int64_t Length = 0;
+    std::optional<int64_t> Length;
     int64_t Value = 0;
     if (Parser.parseIntToken(
             Value, "expected instruction format or an integer constant"))
@@ -3158,38 +3159,32 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
       Length = Value;
       if (Parser.parseIntToken(Value, "expected an integer constant"))
         return true;
-    }
 
-    // TODO: Support Instructions > 64 bits.
-    if (Length > 8)
-      return Error(ErrorLoc,
-                   "instruction lengths over 64 bits are not supported");
+      if (*Length == 0 || (*Length % 2) != 0)
+        return Error(ErrorLoc, "instruction lengths must be a non-zero multiple of two");
 
-    int64_t EncodingDerivedLength = 4;
-    unsigned Opcode = RISCV::Insn32;
-    if ((Value & 0b11) != 0b11) {
-      EncodingDerivedLength = 2;
-      Opcode = RISCV::Insn16;
-    } else
-      switch (Value & 0b111'1111) {
-      case 0b001'1111:
-      case 0b101'1111:
-        EncodingDerivedLength = 6;
-        Opcode = RISCV::Insn48;
-        break;
-      case 0b011'1111:
-        EncodingDerivedLength = 8;
-        Opcode = RISCV::Insn64;
-        break;
-      case 0b111'1111:
-        // TODO: Support Instructions > 64 bits.
+      // TODO: Support Instructions > 64 bits.
+      if (Length > 8)
         return Error(ErrorLoc,
-                     "instruction lengths over 64 bits are not supported");
-      }
-    if (Length != 0 && Length != EncodingDerivedLength)
-      return Error(ErrorLoc, "instruction length does not match the encoding");
-    if (!isUIntN(EncodingDerivedLength * 8, Value))
-      return Error(ErrorLoc, "encoding value does not fit into instruction");
+                    "instruction lengths over 64 bits are not supported");
+    }
+
+    // We only derive a length from the encoding for 16- and 32-bit instructions, as
+    // the encodings for longer instructions are not frozen in the spec.
+    int64_t EncodingDerivedLength = ((Value & 0b11) == 0b11) ? 4 : 2;
+
+    if (Length) {
+      // Only check the length against the encoding if the length is present and could match
+      if ((*Length <= 4) && (*Length != EncodingDerivedLength))
+        return Error(ErrorLoc, "instruction length does not match the encoding");
+
+      if (!isUIntN(*Length * 8, Value))
+        return Error(ErrorLoc, "encoding value does not fit into instruction");
+    } else {
+      if (!isUIntN(EncodingDerivedLength * 8, Value))
+        return Error(ErrorLoc, "encoding value does not fit into instruction");
+    }
+
     if (!AllowC && (EncodingDerivedLength == 2))
       return Error(ErrorLoc, "compressed instructions are not allowed");
 
@@ -3198,6 +3193,28 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
       return true;
     }
 
+
+    unsigned Opcode;
+    if (Length) {
+      switch (*Length) {
+      case 2:
+        Opcode = RISCV::Insn16;
+        break;
+      case 4:
+        Opcode = RISCV::Insn32;
+        break;
+      case 6:
+        Opcode = RISCV::Insn48;
+        break;
+      case 8:
+        Opcode = RISCV::Insn64;
+        break;
+      default:
+        llvm_unreachable("Error should have already been emitted");
+      }
+    } else
+      Opcode = (EncodingDerivedLength == 2) ? RISCV::Insn16 : RISCV::Insn32;
+
     emitToStreamer(getStreamer(), MCInstBuilder(Opcode).addImm(Value));
     return false;
   }
diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s
index e32619976108ac..4622d616a3d577 100644
--- a/llvm/test/MC/RISCV/insn-invalid.s
+++ b/llvm/test/MC/RISCV/insn-invalid.s
@@ -29,32 +29,25 @@
 
 .insn 0x4, 0x13, 0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction
 
-.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
-.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
-.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
 
 .insn 10, 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
-.insn 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
 
 .insn 0x2, 0x03 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 .insn 0x2, 0x1f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 .insn 0x2, 0x3f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 
 .insn 0x4, 0x00000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x4, 0x0000001f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x4, 0x0000003f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 
-.insn 0x6, 0x000000000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x6, 0x000000000013 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x6, 0x00000000003f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x6, 0x000000000001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
+.insn 0x8, 0x0000000000000001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
 
-.insn 0x8, 0x0000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x8, 0x0000013 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x8, 0x000001f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-
-.insn 0x2, 0xffff0001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
-.insn 0x4, 0xffff00000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
-.insn 0x6, 0xffff00000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x2, 0x10001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x4, 0x100000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x6, 0x100000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x8, 0x1000000000000003f # CHECK: :[[@LINE]]:12: error: expected an integer constant
 
 .insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
 .insn 0x2, 0x0001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s
index b9963dd70d36ff..e32fec25bb16b4 100644
--- a/llvm/test/MC/RISCV/insn.s
+++ b/llvm/test/MC/RISCV/insn.s
@@ -170,8 +170,8 @@ target:
 # CHECK-OBJ: <unknown>
 .insn 6, 0x1f
 
-# CHECK-ASM: .insn 0x6, 65503
-# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00,0x00,0x00]
+# CHECK-ASM: .insn 0x4, 65503
+# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00]
 # CHECK-OBJ: <unknown>
 .insn 0xffdf
 
@@ -180,7 +180,7 @@ target:
 # CHECK-OBJ: <unknown>
 .insn 8, 0x3f
 
-# CHECK-ASM: .insn 0x8, 65471
-# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00,0x00,0x00,0x00,0x00]
+# CHECK-ASM: .insn 0x4, 65471
+# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00]
 # CHECK-OBJ: <unknown>
 .insn 0xffbf

>From e1ed7ed8175298a3ffc6a501c0da35f333161170 Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Thu, 26 Sep 2024 06:56:47 -0700
Subject: [PATCH 4/8] fixup! [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp   | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index f42aac2816086d..fe38e04bc57b40 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3161,22 +3161,26 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
         return true;
 
       if (*Length == 0 || (*Length % 2) != 0)
-        return Error(ErrorLoc, "instruction lengths must be a non-zero multiple of two");
+        return Error(ErrorLoc,
+                     "instruction lengths must be a non-zero multiple of two");
 
       // TODO: Support Instructions > 64 bits.
       if (Length > 8)
         return Error(ErrorLoc,
-                    "instruction lengths over 64 bits are not supported");
+                     "instruction lengths over 64 bits are not supported");
     }
 
-    // We only derive a length from the encoding for 16- and 32-bit instructions, as
-    // the encodings for longer instructions are not frozen in the spec.
+    // We only derive a length from the encoding for 16- and 32-bit
+    // instructions, as the encodings for longer instructions are not frozen in
+    // the spec.
     int64_t EncodingDerivedLength = ((Value & 0b11) == 0b11) ? 4 : 2;
 
     if (Length) {
-      // Only check the length against the encoding if the length is present and could match
+      // Only check the length against the encoding if the length is present and
+      // could match
       if ((*Length <= 4) && (*Length != EncodingDerivedLength))
-        return Error(ErrorLoc, "instruction length does not match the encoding");
+        return Error(ErrorLoc,
+                     "instruction length does not match the encoding");
 
       if (!isUIntN(*Length * 8, Value))
         return Error(ErrorLoc, "encoding value does not fit into instruction");
@@ -3193,7 +3197,6 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
       return true;
     }
 
-
     unsigned Opcode;
     if (Length) {
       switch (*Length) {

>From 1e87d8d50668c7eec1e553e58426926582fd6de2 Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Thu, 26 Sep 2024 23:03:03 +0100
Subject: [PATCH 5/8] fixup! Correctly deref optional

---
 llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index fe38e04bc57b40..f5519920b43f7a 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3165,7 +3165,7 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
                      "instruction lengths must be a non-zero multiple of two");
 
       // TODO: Support Instructions > 64 bits.
-      if (Length > 8)
+      if (*Length > 8)
         return Error(ErrorLoc,
                      "instruction lengths over 64 bits are not supported");
     }

>From b3d98bc69e7ea9eaec5a96ee96453ec56d13139e Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Fri, 27 Sep 2024 05:45:44 -0700
Subject: [PATCH 6/8] fixup! [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

---
 llvm/test/MC/RISCV/insn_c-invalid.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/MC/RISCV/insn_c-invalid.s b/llvm/test/MC/RISCV/insn_c-invalid.s
index 3b424b2a9fd329..9af864edc31e24 100644
--- a/llvm/test/MC/RISCV/insn_c-invalid.s
+++ b/llvm/test/MC/RISCV/insn_c-invalid.s
@@ -24,4 +24,4 @@
 ## Make fake mnemonics we use to match these in the tablegened asm match table isn't exposed.
 .insn_cr  2, 9, a0, a1 # CHECK: :[[#@LINE]]:1: error: unknown directive
 
-.insn 0xfffffff0 # CHECK: :[[@LINE]]:7: error: invalid operand for instruction
+.insn 0xfffffff0 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction

>From 1b64d8c917d78690682a45ba74141adfbcfa218e Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Fri, 27 Sep 2024 06:40:53 -0700
Subject: [PATCH 7/8] Add Docs for RISC-V .insn directive

---
 llvm/docs/RISCVUsage.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index d22f642865bb3a..fabedb7d12bc05 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -426,6 +426,20 @@ line.  This currently applies to the following extensions:
 
 No extensions have experimental intrinsics.
 
+Long (>32-bit) Instruction Support
+==================================
+
+RISC-V is a variable-length ISA, but the standard currently only defines 16- and 32-bit instructions. The specification describes longer instruction encodings, but these are not stable.
+
+The LLVM disassembler, `llvm-objdump`, does use the longer instruction encodings described in the specification to guess the instruction length (up to 176 bits) and will group the disassembly view of encoding bytes correspondingly.
+
+The LLVM integrated assembler for RISC-V supports two different kinds of ``.insn`` directive, for assembling instructions that LLVM does not yet support:
+
+* ``.insn type, args*`` which takes a known instruction type, and a list of fields. You are strongly recommended to use this variant of the directive if your instruction fits an existing instruction type.
+* ``.insn [ length , ] encoding`` which takes an (optional) explicit length (in bytes) and a raw encoding for the instruction. When given an explicit length, this variant can encode instructions up to 64 bits long. The encoding part of the directive must be given all bits for the instruction, none are filled in for the user. When used without the optional length, this variant of the directive will use the LSBs of the raw encoding to work out if an instruction is 16 or 32 bits long. LLVM does not infer that an instruction might be longer than 32 bits - in this case, the user must give the length explicitly.
+
+It is stringly recommended to use the ``.insn`` directive for assembling unsupported instructions instead of ``.word`` or ``.hword``, because it will produce the correct mapping symbols to mark the word as an instruction, not data.
+
 Global Pointer (GP) Relaxation and the Small Data Limit
 =======================================================
 

>From ce5f18c0b3b715930a72c060a9b9b8053c21f0ba Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Mon, 7 Oct 2024 08:38:02 -0700
Subject: [PATCH 8/8] fixup! [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

---
 llvm/docs/RISCVUsage.rst          | 4 ++--
 llvm/test/MC/RISCV/insn-invalid.s | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index fabedb7d12bc05..5736f3807f131b 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -429,7 +429,7 @@ No extensions have experimental intrinsics.
 Long (>32-bit) Instruction Support
 ==================================
 
-RISC-V is a variable-length ISA, but the standard currently only defines 16- and 32-bit instructions. The specification describes longer instruction encodings, but these are not stable.
+RISC-V is a variable-length ISA, but the standard currently only defines 16- and 32-bit instructions. The specification describes longer instruction encodings, but these are not ratified.
 
 The LLVM disassembler, `llvm-objdump`, does use the longer instruction encodings described in the specification to guess the instruction length (up to 176 bits) and will group the disassembly view of encoding bytes correspondingly.
 
@@ -438,7 +438,7 @@ The LLVM integrated assembler for RISC-V supports two different kinds of ``.insn
 * ``.insn type, args*`` which takes a known instruction type, and a list of fields. You are strongly recommended to use this variant of the directive if your instruction fits an existing instruction type.
 * ``.insn [ length , ] encoding`` which takes an (optional) explicit length (in bytes) and a raw encoding for the instruction. When given an explicit length, this variant can encode instructions up to 64 bits long. The encoding part of the directive must be given all bits for the instruction, none are filled in for the user. When used without the optional length, this variant of the directive will use the LSBs of the raw encoding to work out if an instruction is 16 or 32 bits long. LLVM does not infer that an instruction might be longer than 32 bits - in this case, the user must give the length explicitly.
 
-It is stringly recommended to use the ``.insn`` directive for assembling unsupported instructions instead of ``.word`` or ``.hword``, because it will produce the correct mapping symbols to mark the word as an instruction, not data.
+It is strongly recommended to use the ``.insn`` directive for assembling unsupported instructions instead of ``.word`` or ``.hword``, because it will produce the correct mapping symbols to mark the word as an instruction, not data.
 
 Global Pointer (GP) Relaxation and the Small Data Limit
 =======================================================
diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s
index 4622d616a3d577..ef2f3c382972ab 100644
--- a/llvm/test/MC/RISCV/insn-invalid.s
+++ b/llvm/test/MC/RISCV/insn-invalid.s
@@ -33,6 +33,8 @@
 .insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 .insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
 
+.insn 0x0, 0x0 # CHECK: :[[@LINE]]:7: error: instruction lengths must be a non-zero multiple of two
+.insn 0x1, 0xff # CHECK: :[[@LINE]]:7: error: instruction lengths must be a non-zero multiple of two
 .insn 10, 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
 
 .insn 0x2, 0x03 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding