[llvm] [RISCV] Support Expressions in .insn Directives (PR #111893)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 11:55:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Sam Elliott (lenary)
<details>
<summary>Changes</summary>
When assembling raw instructions, often you want to or together several
fields for clarity, or because you're using an assembly macro with
separate arguments.
This brings the use of `.insn` closer into line with what can be done
with the binutils assembler.
The 64-bit instruction test here explicitly sets the top bit to make
sure this works, even though the assembler is really using `int64_t`
in most places internally.
---
This is stacked on #<!-- -->111878, as testing this change is how I found the bug. Please only review the last commit in this PR.
---
Full diff: https://github.com/llvm/llvm-project/pull/111893.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp (+2-3)
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp (+3-3)
- (modified) llvm/test/MC/RISCV/insn-invalid.s (+6-3)
- (modified) llvm/test/MC/RISCV/insn.s (+39-6)
``````````diff
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index e68674e830436f..d77ad02ec47bf1 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3172,12 +3172,11 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
// Try parsing .insn [ length , ] value
std::optional<int64_t> Length;
int64_t Value = 0;
- if (Parser.parseIntToken(
- Value, "expected instruction format or an integer constant"))
+ if (Parser.parseAbsoluteExpression(Value))
return true;
if (Parser.parseOptionalToken(AsmToken::Comma)) {
Length = Value;
- if (Parser.parseIntToken(Value, "expected an integer constant"))
+ if (Parser.parseAbsoluteExpression(Value))
return true;
if (*Length == 0 || (*Length % 2) != 0)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 66970ed37f2724..54f1a3899c4957 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -77,7 +77,7 @@ class RISCVMCCodeEmitter : public MCCodeEmitter {
/// Return binary encoding of operand. If the machine operand requires
/// relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -375,7 +375,7 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI,
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
-unsigned
+uint64_t
RISCVMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -384,7 +384,7 @@ RISCVMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
+ return MO.getImm();
llvm_unreachable("Unhandled expression!");
return 0;
diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s
index ef2f3c382972ab..40d57a2b6fddf1 100644
--- a/llvm/test/MC/RISCV/insn-invalid.s
+++ b/llvm/test/MC/RISCV/insn-invalid.s
@@ -24,8 +24,8 @@
# Make fake mnemonics we use to match these in the tablegened asm match table isn't exposed.
.insn_i 0x13, 0, a0, a1, 13, 14 # CHECK: :[[@LINE]]:1: error: unknown directive
-.insn . # CHECK: :[[@LINE]]:7: error: expected instruction format or an integer constant
-.insn 0x2, # CHECK: :[[@LINE]]:12: error: expected an integer constant
+.insn . # CHECK: :[[@LINE]]:7: error: expected absolute expression
+.insn 0x2, # CHECK: :[[@LINE]]:12: error: unknown token in expression
.insn 0x4, 0x13, 0 # CHECK: :[[@LINE]]:16: error: invalid operand for instruction
@@ -49,7 +49,10 @@
.insn 0x2, 0x10001 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
.insn 0x4, 0x100000003 # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
.insn 0x6, 0x100000000001f # CHECK: :[[@LINE]]:7: error: encoding value does not fit into instruction
-.insn 0x8, 0x1000000000000003f # CHECK: :[[@LINE]]:12: error: expected an integer constant
+.insn 0x8, 0x1000000000000003f # CHECK: :[[@LINE]]:12: error: literal value out of range for directive
.insn 0x0010 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
.insn 0x2, 0x0001 # CHECK: :[[@LINE]]:7: error: compressed instructions are not allowed
+
+.insn 0x2 + 0x2, 0x3 | (31 # CHECK: :[[@LINE]]:28: error: expected ')'
+.insn 0x4 * , 0xbf | (31 << 59) # CHECK: :[[@LINE]]:13: error: unknown token in expression
diff --git a/llvm/test/MC/RISCV/insn.s b/llvm/test/MC/RISCV/insn.s
index e32fec25bb16b4..829364c6328843 100644
--- a/llvm/test/MC/RISCV/insn.s
+++ b/llvm/test/MC/RISCV/insn.s
@@ -170,17 +170,50 @@ target:
# CHECK-OBJ: <unknown>
.insn 6, 0x1f
-# CHECK-ASM: .insn 0x4, 65503
-# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00]
-# CHECK-OBJ: <unknown>
-.insn 0xffdf
-
# CHECK-ASM: .insn 0x8, 63
# CHECK-ASM: encoding: [0x3f,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
# CHECK-OBJ: <unknown>
.insn 8, 0x3f
+# CHECK-ASM: .insn 0x6, 281474976710623
+# CHECK-ASM: encoding: [0xdf,0xff,0xff,0xff,0xff,0xff]
+# CHECK-OBJ: <unknown>
+.insn 0x6, 0xffffffffffdf
+
+# CHECK-ASM: .insn 0x8, -65
+# CHECK-ASM: encoding: [0xbf,0xff,0xff,0xff,0xff,0xff,0xff,0xff]
+# CHECK-OBJ: <unknown>
+.insn 0x8, 0xffffffffffffffbf
+
+# CHECK-ASM: .insn 0x4, 3971
+# CHECK-ASM: encoding: [0x83,0x0f,0x00,0x00]
+# CHECK-OBJ: lb t6, 0x0(zero)
+.insn 0x2 + 0x2, 0x3 | (31 << 7)
+
+# CHECK-ASM: .insn 0x8, -576460752303423297
+# CHECK-ASM: encoding: [0xbf,0x00,0x00,0x00,0x00,0x00,0x00,0xf8]
+# CHECK-OBJ: <unknown>
+.insn 0x4 * 0x2, 0xbf | (31 << 59)
+
+odd_lengths:
+# CHECK-ASM-LABEL: odd_lengths:
+# CHECK-OBJ-LABEL: <odd_lengths>:
+
+## These deliberately disagree with the lengths objdump expects them to have, so
+## keep them at the end so that the disassembled instruction stream is not out
+## of sync with the encoded instruction stream. We don't check for `<unknown>`
+## as we could get any number of those, so instead check for the encoding
+## halfwords. These might be split into odd 16-bit chunks, so each chunk is on
+## one line.
+
+# CHECK-ASM: .insn 0x4, 65503
+# CHECK-ASM: encoding: [0xdf,0xff,0x00,0x00]
+# CHECK-OBJ: ffdf
+# CHECK-OBJ: 0000
+.insn 0xffdf
+
# CHECK-ASM: .insn 0x4, 65471
# CHECK-ASM: encoding: [0xbf,0xff,0x00,0x00]
-# CHECK-OBJ: <unknown>
+# CHECK-OBJ: ffbf
+# CHECK-OBJ: 0000
.insn 0xffbf
``````````
</details>
https://github.com/llvm/llvm-project/pull/111893
More information about the llvm-commits
mailing list