[llvm] [RISCV][MC] Support Assembling 48- and 64-bit Instructions (PR #110022)

Thu Sep 26 15:03:13 PDT 2024

https://github.com/lenary updated https://github.com/llvm/llvm-project/pull/110022

>From 3d554c629373757b0eae2484dd38fe2074d89d1f Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Wed, 25 Sep 2024 09:37:53 -0700
Subject: [PATCH 1/5] [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

This adds `.insn` support for assembling instructions of 48- and
64-bits. Disassembly already knows to bunch up the instruction bits for
these instructions, but will print the disassebly as `<unknown>`.

This changes some error messages so they are a little clearer.

Co-authored-by: Sudharsan Veeravalli <quic_svs at quicinc.com>
---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 46 ++++++++++++++-----
 .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h |  2 +
 .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 15 ++++++
 llvm/lib/Target/RISCV/RISCVInstrFormats.td    | 16 +++++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       | 12 +++++
 llvm/test/MC/RISCV/insn-invalid.s             | 35 ++++++++++++--
 llvm/test/MC/RISCV/insn.s                     | 10 ++++
 7 files changed, 121 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 5e29a92f0bacd6..f4e6cccd1b2878 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -707,6 +707,8 @@ struct RISCVOperand final : public MCParsedAsmOperand {
   bool isUImm16() const { return IsUImm<16>(); }
   bool isUImm20() const { return IsUImm<20>(); }
   bool isUImm32() const { return IsUImm<32>(); }
+  bool isUImm48() const { return IsUImm<48>(); }
+  bool isUImm64() const { return IsUImm<64>(); }
 
   bool isUImm8GE32() const {
     int64_t Imm;
@@ -3146,7 +3148,7 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
   StringRef Format;
   SMLoc ErrorLoc = Parser.getTok().getLoc();
   if (Parser.parseIdentifier(Format)) {
-    // Try parsing .insn [length], value
+    // Try parsing .insn [ length , ] value
     int64_t Length = 0;
     int64_t Value = 0;
     if (Parser.parseIntToken(
@@ -3158,23 +3160,45 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
         return true;
     }
 
-    // TODO: Add support for long instructions
-    int64_t RealLength = (Value & 3) == 3 ? 4 : 2;
-    if (!isUIntN(RealLength * 8, Value))
-      return Error(ErrorLoc, "invalid operand for instruction");
-    if (RealLength == 2 && !AllowC)
+    // TODO: Support Instructions > 64 bits.
+    if (Length > 8)
+      return Error(ErrorLoc,
+                   "instruction lengths over 64 bits are not supported");
+
+    int64_t EncodingDerivedLength = 4;
+    unsigned Opcode = RISCV::Insn32;
+    if ((Value & 0b11) != 0b11) {
+      EncodingDerivedLength = 2;
+      Opcode = RISCV::Insn16;
+    } else
+      switch (Value & 0b111'1111) {
+      case 0b001'1111:
+      case 0b101'1111:
+        EncodingDerivedLength = 6;
+        Opcode = RISCV::Insn48;
+        break;
+      case 0b011'1111:
+        EncodingDerivedLength = 8;
+        Opcode = RISCV::Insn64;
+        break;
+      case 0b111'1111:
+        // TODO: Support Instructions > 64 bits.
+        return Error(ErrorLoc,
+                     "instruction lengths over 64 bits are not supported");
+      }
+    if (Length != 0 && Length != EncodingDerivedLength)
+      return Error(ErrorLoc, "instruction length does not match the encoding");
+    if (!isUIntN(EncodingDerivedLength * 8, Value))
+      return Error(ErrorLoc, "encoding value does not fit into instruction");
+    if (!AllowC && (EncodingDerivedLength == 2))
       return Error(ErrorLoc, "compressed instructions are not allowed");
-    if (Length != 0 && Length != RealLength)
-      return Error(ErrorLoc, "instruction length mismatch");
 
     if (getParser().parseEOL("invalid operand for instruction")) {
       getParser().eatToEndOfStatement();
       return true;
     }
 
-    emitToStreamer(getStreamer(), MCInstBuilder(RealLength == 2 ? RISCV::Insn16
-                                                                : RISCV::Insn32)
-                                      .addImm(Value));
+    emitToStreamer(getStreamer(), MCInstBuilder(Opcode).addImm(Value));
     return false;
   }
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index cf3ea3e4ea2131..d82f78498418da 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -309,6 +309,8 @@ enum OperandType : unsigned {
   OPERAND_UIMM12,
   OPERAND_UIMM16,
   OPERAND_UIMM32,
+  OPERAND_UIMM48,
+  OPERAND_UIMM64,
   OPERAND_ZERO,
   OPERAND_SIMM5,
   OPERAND_SIMM5_PLUS1,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index eb21498d15e86c..66970ed37f2724 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -355,6 +355,21 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI,
     support::endian::write(CB, Bits, llvm::endianness::little);
     break;
   }
+  case 6: {
+    uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI) & 0xffff'ffff'ffffu;
+    SmallVector<char, 8> Encoding;
+    support::endian::write(Encoding, Bits, llvm::endianness::little);
+    assert(Encoding[6] == 0 && Encoding[7] == 0 &&
+           "Unexpected encoding for 48-bit instruction");
+    Encoding.truncate(6);
+    CB.append(Encoding);
+    break;
+  }
+  case 8: {
+    uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+    support::endian::write(CB, Bits, llvm::endianness::little);
+    break;
+  }
   }
 
   ++MCNumEmitted; // Keep track of the # of mi's emitted.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index fcea18f81b3901..013c26c72bfd55 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -266,6 +266,22 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
   let Size = 4;
 }
 
+class RVInst48<dag outs, dag ins, string opcodestr, string argstr,
+               list<dag> pattern, InstFormat format>
+    : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+  let Size = 6;
+}
+
+class RVInst64<dag outs, dag ins, string opcodestr, string argstr,
+               list<dag> pattern, InstFormat format>
+    : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
+  field bits<64> Inst;
+  field bits<64> SoftFail = 0;
+  let Size = 8;
+}
+
 // Pseudo instructions
 class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string argstr = "">
     : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index fe5623e2920e22..34439880c3a31a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -235,6 +235,8 @@ def uimm7 : RISCVUImmOp<7>;
 def uimm8 : RISCVUImmOp<8>;
 def uimm16 : RISCVUImmOp<16>;
 def uimm32 : RISCVUImmOp<32>;
+def uimm48 : RISCVUImmOp<48>;
+def uimm64 : RISCVUImmOp<64>;
 def simm12 : RISCVSImmLeafOp<12> {
   let MCOperandPredicate = [{
     int64_t Imm;
@@ -1135,6 +1137,16 @@ def Insn32 : RVInst<(outs), (ins uimm32:$value), "", "", [], InstFormatOther> {
   let Inst{31-0} = value;
   let AsmString = ".insn 0x4, $value";
 }
+def Insn48 : RVInst48<(outs), (ins uimm48:$value), "", "", [], InstFormatOther> {
+  bits<48> value;
+  let Inst{47-0} = value;
+  let AsmString = ".insn 0x6, $value";
+}
+def Insn64 : RVInst64<(outs), (ins uimm64:$value), "", "", [], InstFormatOther> {
+  bits<64> value;
+  let Inst{63-0} = value;
+  let AsmString = ".insn 0x8, $value";
+}
 }
 
 // Use InstAliases to match these so that we can combine the insn and format
diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s
index d6fabea4e17016..e32619976108ac 100644
--- a/llvm/test/MC/RISCV/insn-invalid.s
+++ b/llvm/test/MC/RISCV/insn-invalid.s
@@ -26,8 +26,35 @@
 
 .insn . # CHECK: :[[@LINE]]:7: error: expected instruction format or an integer constant
 .insn 0x2, # CHECK: :[[@LINE]]:12: error: expected an integer constant
-.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch
-.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch
-.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: invalid operand for instruction
-.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
+
 .insn 0x4, 0x13, 0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction
+
+.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+
+.insn 10, 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+.insn 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+
+.insn 0x2, 0x03 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x2, 0x1f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x2, 0x3f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+
+.insn 0x4, 0x00000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x4, 0x0000001f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x4, 0x0000003f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+
+.insn 0x6, 0x000000000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x6, 0x000000000013 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x6, 0x00000000003f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+
+.insn 0x8, 0x0000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x8, 0x0000013 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x8, 0x000001f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+
+.insn 0x2, 0xffff0001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x4, 0xffff00000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x6, 0xffff00000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+
+.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
+.insn 0x2, 0x0001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s
index b95c3b87b442f2..ff09393a6b285c 100644
--- a/llvm/test/MC/RISCV/insn.s
+++ b/llvm/test/MC/RISCV/insn.s
@@ -164,3 +164,13 @@ target:
 # CHECK-ASM: encoding: [0x13,0x00,0x00,0x00]
 # CHECK-OBJ: addi zero, zero, 0x0
 .insn 0x4, 0x13
+
+# CHECK-ASM: .insn 0x6, 31
+# CHECK-ASM: encoding: [0x1f,0x00,0x00,0x00,0x00,0x00]
+# CHECK-OBJ: <unknown>
+.insn 6, 0x1f
+
+# CHECK-ASM: .insn 0x8, 63
+# CHECK-ASM: encoding: [0x3f,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+# CHECK-OBJ: <unknown>
+.insn 8, 0x3f

>From 90f096cadbe35a03be04b8f573eea2e6faf8c518 Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Wed, 25 Sep 2024 11:34:56 -0700
Subject: [PATCH 2/5] fixup! [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

---
 llvm/test/MC/RISCV/insn.s | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s
index ff09393a6b285c..b9963dd70d36ff 100644
--- a/llvm/test/MC/RISCV/insn.s
+++ b/llvm/test/MC/RISCV/insn.s
@@ -170,7 +170,17 @@ target:
 # CHECK-OBJ: <unknown>
 .insn 6, 0x1f
 
+# CHECK-ASM: .insn 0x6, 65503
+# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00,0x00,0x00]
+# CHECK-OBJ: <unknown>
+.insn 0xffdf
+
 # CHECK-ASM: .insn 0x8, 63
 # CHECK-ASM: encoding: [0x3f,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 # CHECK-OBJ: <unknown>
 .insn 8, 0x3f
+
+# CHECK-ASM: .insn 0x8, 65471
+# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00,0x00,0x00,0x00,0x00]
+# CHECK-OBJ: <unknown>
+.insn 0xffbf

>From 306c1bdbc59895143d4fa0806d7166d7ed3626b6 Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Thu, 26 Sep 2024 06:50:37 -0700
Subject: [PATCH 3/5] fixup! [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 77 +++++++++++--------
 llvm/test/MC/RISCV/insn-invalid.s             | 25 +++---
 llvm/test/MC/RISCV/insn.s                     |  8 +-
 3 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index f4e6cccd1b2878..f42aac2816086d 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -41,6 +41,7 @@
 #include "llvm/TargetParser/RISCVISAInfo.h"
 
 #include <limits>
+#include <optional>
 
 using namespace llvm;
 
@@ -3149,7 +3150,7 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
   SMLoc ErrorLoc = Parser.getTok().getLoc();
   if (Parser.parseIdentifier(Format)) {
     // Try parsing .insn [ length , ] value
-    int64_t Length = 0;
+    std::optional<int64_t> Length;
     int64_t Value = 0;
     if (Parser.parseIntToken(
             Value, "expected instruction format or an integer constant"))
@@ -3158,38 +3159,32 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
       Length = Value;
       if (Parser.parseIntToken(Value, "expected an integer constant"))
         return true;
-    }
 
-    // TODO: Support Instructions > 64 bits.
-    if (Length > 8)
-      return Error(ErrorLoc,
-                   "instruction lengths over 64 bits are not supported");
+      if (*Length == 0 || (*Length % 2) != 0)
+        return Error(ErrorLoc, "instruction lengths must be a non-zero multiple of two");
 
-    int64_t EncodingDerivedLength = 4;
-    unsigned Opcode = RISCV::Insn32;
-    if ((Value & 0b11) != 0b11) {
-      EncodingDerivedLength = 2;
-      Opcode = RISCV::Insn16;
-    } else
-      switch (Value & 0b111'1111) {
-      case 0b001'1111:
-      case 0b101'1111:
-        EncodingDerivedLength = 6;
-        Opcode = RISCV::Insn48;
-        break;
-      case 0b011'1111:
-        EncodingDerivedLength = 8;
-        Opcode = RISCV::Insn64;
-        break;
-      case 0b111'1111:
-        // TODO: Support Instructions > 64 bits.
+      // TODO: Support Instructions > 64 bits.
+      if (Length > 8)
         return Error(ErrorLoc,
-                     "instruction lengths over 64 bits are not supported");
-      }
-    if (Length != 0 && Length != EncodingDerivedLength)
-      return Error(ErrorLoc, "instruction length does not match the encoding");
-    if (!isUIntN(EncodingDerivedLength * 8, Value))
-      return Error(ErrorLoc, "encoding value does not fit into instruction");
+                    "instruction lengths over 64 bits are not supported");
+    }
+
+    // We only derive a length from the encoding for 16- and 32-bit instructions, as
+    // the encodings for longer instructions are not frozen in the spec.
+    int64_t EncodingDerivedLength = ((Value & 0b11) == 0b11) ? 4 : 2;
+
+    if (Length) {
+      // Only check the length against the encoding if the length is present and could match
+      if ((*Length <= 4) && (*Length != EncodingDerivedLength))
+        return Error(ErrorLoc, "instruction length does not match the encoding");
+
+      if (!isUIntN(*Length * 8, Value))
+        return Error(ErrorLoc, "encoding value does not fit into instruction");
+    } else {
+      if (!isUIntN(EncodingDerivedLength * 8, Value))
+        return Error(ErrorLoc, "encoding value does not fit into instruction");
+    }
+
     if (!AllowC && (EncodingDerivedLength == 2))
       return Error(ErrorLoc, "compressed instructions are not allowed");
 
@@ -3198,6 +3193,28 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
       return true;
     }
 
+
+    unsigned Opcode;
+    if (Length) {
+      switch (*Length) {
+      case 2:
+        Opcode = RISCV::Insn16;
+        break;
+      case 4:
+        Opcode = RISCV::Insn32;
+        break;
+      case 6:
+        Opcode = RISCV::Insn48;
+        break;
+      case 8:
+        Opcode = RISCV::Insn64;
+        break;
+      default:
+        llvm_unreachable("Error should have already been emitted");
+      }
+    } else
+      Opcode = (EncodingDerivedLength == 2) ? RISCV::Insn16 : RISCV::Insn32;
+
     emitToStreamer(getStreamer(), MCInstBuilder(Opcode).addImm(Value));
     return false;
   }
diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s
index e32619976108ac..4622d616a3d577 100644
--- a/llvm/test/MC/RISCV/insn-invalid.s
+++ b/llvm/test/MC/RISCV/insn-invalid.s
@@ -29,32 +29,25 @@
 
 .insn 0x4, 0x13, 0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction
 
-.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
-.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
-.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
+.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
 
 .insn 10, 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
-.insn 0x000007f # CHECK: :[[@LINE]]:7: error: instruction lengths over 64 bits are not supported
 
 .insn 0x2, 0x03 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 .insn 0x2, 0x1f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 .insn 0x2, 0x3f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 
 .insn 0x4, 0x00000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x4, 0x0000001f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x4, 0x0000003f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
 
-.insn 0x6, 0x000000000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x6, 0x000000000013 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x6, 0x00000000003f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
+.insn 0x6, 0x000000000001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
+.insn 0x8, 0x0000000000000001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
 
-.insn 0x8, 0x0000001 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x8, 0x0000013 # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-.insn 0x8, 0x000001f # CHECK: :[[@LINE]]:7: error: instruction length does not match the encoding
-
-.insn 0x2, 0xffff0001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
-.insn 0x4, 0xffff00000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
-.insn 0x6, 0xffff00000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x2, 0x10001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x4, 0x100000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x6, 0x100000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
+.insn 0x8, 0x1000000000000003f # CHECK: :[[@LINE]]:12: error: expected an integer constant
 
 .insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
 .insn 0x2, 0x0001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s
index b9963dd70d36ff..e32fec25bb16b4 100644
--- a/llvm/test/MC/RISCV/insn.s
+++ b/llvm/test/MC/RISCV/insn.s
@@ -170,8 +170,8 @@ target:
 # CHECK-OBJ: <unknown>
 .insn 6, 0x1f
 
-# CHECK-ASM: .insn 0x6, 65503
-# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00,0x00,0x00]
+# CHECK-ASM: .insn 0x4, 65503
+# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00]
 # CHECK-OBJ: <unknown>
 .insn 0xffdf
 
@@ -180,7 +180,7 @@ target:
 # CHECK-OBJ: <unknown>
 .insn 8, 0x3f
 
-# CHECK-ASM: .insn 0x8, 65471
-# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00,0x00,0x00,0x00,0x00]
+# CHECK-ASM: .insn 0x4, 65471
+# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00]
 # CHECK-OBJ: <unknown>
 .insn 0xffbf

>From e1ed7ed8175298a3ffc6a501c0da35f333161170 Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Thu, 26 Sep 2024 06:56:47 -0700
Subject: [PATCH 4/5] fixup! [RISCV][MC] Support Assembling 48- and 64-bit
 Instructions

---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp   | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index f42aac2816086d..fe38e04bc57b40 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3161,22 +3161,26 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
         return true;
 
       if (*Length == 0 || (*Length % 2) != 0)
-        return Error(ErrorLoc, "instruction lengths must be a non-zero multiple of two");
+        return Error(ErrorLoc,
+                     "instruction lengths must be a non-zero multiple of two");
 
       // TODO: Support Instructions > 64 bits.
       if (Length > 8)
         return Error(ErrorLoc,
-                    "instruction lengths over 64 bits are not supported");
+                     "instruction lengths over 64 bits are not supported");
     }
 
-    // We only derive a length from the encoding for 16- and 32-bit instructions, as
-    // the encodings for longer instructions are not frozen in the spec.
+    // We only derive a length from the encoding for 16- and 32-bit
+    // instructions, as the encodings for longer instructions are not frozen in
+    // the spec.
     int64_t EncodingDerivedLength = ((Value & 0b11) == 0b11) ? 4 : 2;
 
     if (Length) {
-      // Only check the length against the encoding if the length is present and could match
+      // Only check the length against the encoding if the length is present and
+      // could match
       if ((*Length <= 4) && (*Length != EncodingDerivedLength))
-        return Error(ErrorLoc, "instruction length does not match the encoding");
+        return Error(ErrorLoc,
+                     "instruction length does not match the encoding");
 
       if (!isUIntN(*Length * 8, Value))
         return Error(ErrorLoc, "encoding value does not fit into instruction");
@@ -3193,7 +3197,6 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
       return true;
     }
 
-
     unsigned Opcode;
     if (Length) {
       switch (*Length) {

>From 1e87d8d50668c7eec1e553e58426926582fd6de2 Mon Sep 17 00:00:00 2001
From: Sam Elliott <quic_aelliott at quicinc.com>
Date: Thu, 26 Sep 2024 23:03:03 +0100
Subject: [PATCH 5/5] fixup! Correctly deref optional

---
 llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index fe38e04bc57b40..f5519920b43f7a 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3165,7 +3165,7 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
                      "instruction lengths must be a non-zero multiple of two");
 
       // TODO: Support Instructions > 64 bits.
-      if (Length > 8)
+      if (*Length > 8)
         return Error(ErrorLoc,
                      "instruction lengths over 64 bits are not supported");
     }