[llvm] [RISCV][MC] Add support for hardcode encoding of .insn directive (PR #98030)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 8 07:39:07 PDT 2024


https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/98030

This patch adds support for the following two hardcode encodings  of .insn directive:
```
.insn <insn-length>, <value>
.insn <value>
```

See also gas's patch https://github.com/bminor/binutils-gdb/commit/a262b82fdbf4cda3b0648b1adc32245ca3f78b7a
NOTE: This patch doesn't support long instructions. 

Closes https://github.com/llvm/llvm-project/issues/97498.


>From f094226e52d753652af0951b703f7c2e699a6ead Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 8 Jul 2024 22:27:21 +0800
Subject: [PATCH] [RISCV][MC] Add support for hardcode form of .insn directive

---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 42 +++++++++++++++++--
 .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h |  2 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |  8 ++++
 llvm/lib/Target/RISCV/RISCVInstrInfoC.td      |  6 +++
 llvm/test/MC/RISCV/insn-invalid.s             |  8 ++++
 llvm/test/MC/RISCV/insn.s                     | 10 +++++
 llvm/test/MC/RISCV/insn_c-invalid.s           |  1 +
 llvm/test/MC/RISCV/insn_c.s                   | 10 +++++
 8 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 8ac1cdf0a7a9c..d599c2b9b7b30 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -703,7 +703,9 @@ struct RISCVOperand final : public MCParsedAsmOperand {
   bool isUImm6() const { return IsUImm<6>(); }
   bool isUImm7() const { return IsUImm<7>(); }
   bool isUImm8() const { return IsUImm<8>(); }
+  bool isUImm16() const { return IsUImm<16>(); }
   bool isUImm20() const { return IsUImm<20>(); }
+  bool isUImm32() const { return IsUImm<32>(); }
 
   bool isUImm8GE32() const {
     int64_t Imm;
@@ -3055,17 +3057,49 @@ bool isValidInsnFormat(StringRef Format, bool AllowC) {
 
 /// parseDirectiveInsn
 /// ::= .insn [ format encoding, (operands (, operands)*) ]
+/// ::= .insn [ length, value ]
+/// ::= .insn [ value ]
 bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
   MCAsmParser &Parser = getParser();
 
+  bool AllowC = getSTI().hasFeature(RISCV::FeatureStdExtC) ||
+                getSTI().hasFeature(RISCV::FeatureStdExtZca);
+
   // Expect instruction format as identifier.
   StringRef Format;
   SMLoc ErrorLoc = Parser.getTok().getLoc();
-  if (Parser.parseIdentifier(Format))
-    return Error(ErrorLoc, "expected instruction format");
+  if (Parser.parseIdentifier(Format)) {
+    // Try parsing .insn [length], value
+    int64_t Length = 0;
+    int64_t Value = 0;
+    if (Parser.parseIntToken(
+            Value, "expected instruction format or an integer constant"))
+      return true;
+    if (Parser.parseOptionalToken(AsmToken::Comma)) {
+      Length = Value;
+      if (Parser.parseIntToken(Value, "expected an integer constant"))
+        return true;
+    }
+
+    int64_t RealLength = (Value & 3) == 3 ? 4 : 2;
+    if (!isUIntN(RealLength * 8, Value))
+      return Error(ErrorLoc, "invalid operand for instruction");
+    if (RealLength == 2 && !AllowC)
+      return Error(ErrorLoc, "compressed instructions are not allowed");
+    if (Length != 0 && Length != RealLength)
+      return Error(ErrorLoc, "instruction length mismatch");
+
+    if (getParser().parseEOL("invalid operand for instruction")) {
+      getParser().eatToEndOfStatement();
+      return true;
+    }
+
+    emitToStreamer(getStreamer(), MCInstBuilder(RealLength == 2 ? RISCV::Insn16
+                                                                : RISCV::Insn32)
+                                      .addImm(Value));
+    return false;
+  }
 
-  bool AllowC = getSTI().hasFeature(RISCV::FeatureStdExtC) ||
-                getSTI().hasFeature(RISCV::FeatureStdExtZca);
   if (!isValidInsnFormat(Format, AllowC))
     return Error(ErrorLoc, "invalid instruction format");
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 550904516ac8e..cf83bd977939e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -279,6 +279,8 @@ enum OperandType : unsigned {
   OPERAND_UIMM9_LSB000,
   OPERAND_UIMM10_LSB00_NONZERO,
   OPERAND_UIMM12,
+  OPERAND_UIMM16,
+  OPERAND_UIMM32,
   OPERAND_ZERO,
   OPERAND_SIMM5,
   OPERAND_SIMM5_PLUS1,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 4cdf08a46f285..075aaae2ce507 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -233,6 +233,8 @@ def uimm7_opcode : RISCVUImmOp<7> {
 }
 def uimm7 : RISCVUImmOp<7>;
 def uimm8 : RISCVUImmOp<8>;
+def uimm16 : RISCVUImmOp<16>;
+def uimm32 : RISCVUImmOp<32>;
 def simm12 : RISCVSImmLeafOp<12> {
   let MCOperandPredicate = [{
     int64_t Imm;
@@ -1120,6 +1122,12 @@ def InsnS : DirectiveInsnS<(outs), (ins uimm7_opcode:$opcode, uimm3:$funct3,
                                         AnyReg:$rs2, AnyReg:$rs1,
                                         simm12:$imm12),
                            "$opcode, $funct3, $rs2, ${imm12}(${rs1})">;
+def Insn32 : RVInst<(outs), (ins uimm32:$value), "", "", [], InstFormatOther> {
+  bits<32> value;
+
+  let Inst{31-0} = value;
+  let AsmString = ".insn 0x4, $value";
+}
 }
 
 // Use InstAliases to match these so that we can combine the insn and format
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 458d081763e93..82eeef0ae10a3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -801,6 +801,12 @@ def InsnCJ : DirectiveInsnCJ<(outs), (ins uimm2_opcode:$opcode,
                                           uimm3:$funct3,
                                           simm12_lsb0:$imm11),
                              "$opcode, $funct3, $imm11">;
+def Insn16 : RVInst16<(outs), (ins uimm16:$value), "", "", [], InstFormatOther> {
+  bits<16> value;
+
+  let Inst{15-0} = value;
+  let AsmString = ".insn 0x2, $value";
+}
 }
 
 // Use InstAliases to match these so that we can combine the insn and format
diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s
index 32ebd6867377c..d6fabea4e1701 100644
--- a/llvm/test/MC/RISCV/insn-invalid.s
+++ b/llvm/test/MC/RISCV/insn-invalid.s
@@ -23,3 +23,11 @@
 
 # Make fake mnemonics we use to match these in the tablegened asm match table isn't exposed.
 .insn_i  0x13,  0,  a0, a1, 13, 14 # CHECK: :[[@LINE]]:1: error: unknown directive
+
+.insn . # CHECK: :[[@LINE]]:7: error: expected instruction format or an integer constant
+.insn 0x2, # CHECK: :[[@LINE]]:12: error: expected an integer constant
+.insn 0x2, 0xffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch
+.insn 0x2, 0xffffffff # CHECK: :[[@LINE]]:7: error: instruction length mismatch
+.insn 0xffffffffff # CHECK: :[[@LINE]]:7: error: invalid operand for instruction
+.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
+.insn 0x4, 0x13, 0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction
diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s
index 4eb9a8a7ebf25..b95c3b87b442f 100644
--- a/llvm/test/MC/RISCV/insn.s
+++ b/llvm/test/MC/RISCV/insn.s
@@ -154,3 +154,13 @@ target:
 # CHECK-ASM: encoding: [0x03,0xd3,0x03,0x80]
 # CHECK-OBJ: lhu t1, -0x800(t2)
 .insn i LOAD, 0x5, x6, %lo(2048)(x7)
+
+# CHECK-ASM: .insn 0x4, 19
+# CHECK-ASM: encoding: [0x13,0x00,0x00,0x00]
+# CHECK-OBJ: addi zero, zero, 0x0
+.insn 0x13
+
+# CHECK-ASM: .insn 0x4, 19
+# CHECK-ASM: encoding: [0x13,0x00,0x00,0x00]
+# CHECK-OBJ: addi zero, zero, 0x0
+.insn 0x4, 0x13
diff --git a/llvm/test/MC/RISCV/insn_c-invalid.s b/llvm/test/MC/RISCV/insn_c-invalid.s
index c983d32e7fe97..3b424b2a9fd32 100644
--- a/llvm/test/MC/RISCV/insn_c-invalid.s
+++ b/llvm/test/MC/RISCV/insn_c-invalid.s
@@ -24,3 +24,4 @@
 ## Make fake mnemonics we use to match these in the tablegened asm match table isn't exposed.
 .insn_cr  2, 9, a0, a1 # CHECK: :[[#@LINE]]:1: error: unknown directive
 
+.insn 0xfffffff0 # CHECK: :[[@LINE]]:7: error: invalid operand for instruction
diff --git a/llvm/test/MC/RISCV/insn_c.s b/llvm/test/MC/RISCV/insn_c.s
index e1b3003e0200d..19169e8b08c94 100644
--- a/llvm/test/MC/RISCV/insn_c.s
+++ b/llvm/test/MC/RISCV/insn_c.s
@@ -80,3 +80,13 @@ target:
 # CHECK-ASM: encoding: [0bAAAAAA01,0b101AAAAA]
 # CHECK-OBJ: c.j 0x0 <target>
 .insn cj  1, 5, target
+
+# CHECK-ASM: .insn 0x2, 1
+# CHECK-ASM: encoding: [0x01,0x00]
+# CHECK-OBJ: c.nop
+.insn 0x0001
+
+# CHECK-ASM: .insn 0x2, 1
+# CHECK-ASM: encoding: [0x01,0x00]
+# CHECK-OBJ: c.nop
+.insn 0x2, 0x0001



More information about the llvm-commits mailing list