[llvm] [RISCV] Fix QC.E.LI -> C.LI with Bare Symbol Compression (PR #146763)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 10 12:55:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Sam Elliott (lenary)
<details>
<summary>Changes</summary>
There's a long comment explaining this approach in RISCVInstrInfoXqci.td
This is presented as an alternative to llvm/llvm-project#<!-- -->146184.
---
Full diff: https://github.com/llvm/llvm-project/pull/146763.diff
11 Files Affected:
- (modified) llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp (+4)
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp (+44-2)
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp (+3)
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h (+4)
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp (+2)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td (+79)
- (modified) llvm/test/MC/RISCV/Relocations/mc-dump.s (+1-1)
- (added) llvm/test/MC/RISCV/xqci-fixups.s (+36)
- (modified) llvm/test/MC/RISCV/xqcibi-relocations.s (+14-1)
- (modified) llvm/test/MC/RISCV/xqcilb-relocations.s (+16-1)
- (modified) llvm/test/MC/RISCV/xqcili-relocations.s (+16-1)
``````````diff
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 66f4aade380fab..bcdd6c574870c7 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1650,6 +1650,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSImm26:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 25),
(1 << 25) - 1);
+ // HACK: See comment before `BareSymbolQC_E_LI` in RISCVInstrInfoXqci.td.
+ case Match_InvalidBareSymbolQC_E_LI:
+ LLVM_FALLTHROUGH;
+ // END HACK
case Match_InvalidBareSImm32:
return generateImmOutOfRangeError(Operands, ErrorInfo,
std::numeric_limits<int32_t>::min(),
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 89a87798d71e49..4c579e23e2410e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -76,12 +76,13 @@ MCFixupKindInfo RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_riscv_branch", 0, 32, 0},
{"fixup_riscv_rvc_jump", 2, 11, 0},
{"fixup_riscv_rvc_branch", 0, 16, 0},
+ {"fixup_riscv_rvc_imm", 0, 16, 0},
{"fixup_riscv_call", 0, 64, 0},
{"fixup_riscv_call_plt", 0, 64, 0},
{"fixup_riscv_qc_e_branch", 0, 48, 0},
{"fixup_riscv_qc_e_32", 16, 32, 0},
- {"fixup_riscv_qc_abs20_u", 12, 20, 0},
+ {"fixup_riscv_qc_abs20_u", 0, 32, 0},
{"fixup_riscv_qc_e_call_plt", 0, 48, 0},
// Andes fixups
@@ -134,6 +135,10 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
// For jump instructions the immediate must be in the range
// [-1048576, 1048574]
return Offset > 1048574 || Offset < -1048576;
+ case RISCV::fixup_riscv_rvc_imm:
+ // This fixup can never be emitted as a relocation, so always needs to be
+ // relaxed.
+ return true;
}
}
@@ -152,6 +157,18 @@ static unsigned getRelaxedOpcode(unsigned Opcode, ArrayRef<MCOperand> Operands,
// This only relaxes one "step" - i.e. from C.J to JAL, not from C.J to
// QC.E.J, because we can always relax again if needed.
return RISCV::JAL;
+ case RISCV::C_LI:
+ if (!STI.hasFeature(RISCV::FeatureVendorXqcili))
+ break;
+ // We only need this because `QC.E.LI` can be compressed into a `C.LI`. This
+ // happens because the `simm6` MCOperandPredicate accepts bare symbols, and
+ // `QC.E.LI` is the only instruction that accepts bare symbols at parse-time
+ // and compresses to `C.LI`. `C.LI` does not itself accept bare symbols at
+ // parse time.
+ //
+ // If we have a bare symbol, we need to turn this back to a `QC.E.LI`, as we
+ // have no way to emit a relocation on a `C.LI` instruction.
+ return RISCV::QC_E_LI;
case RISCV::JAL: {
// We can only relax JAL if we have Xqcilb
if (!STI.hasFeature(RISCV::FeatureVendorXqcilb))
@@ -240,6 +257,23 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
Res.addOperand(Inst.getOperand(1));
break;
}
+ case RISCV::C_LI: {
+ // This should only be hit when trying to relax a `C.LI` into a `QC.E.LI`
+ // because the `C.LI` has a bare symbol. We cannot use
+ // `RISCVRVC::uncompress` because it will use decompression patterns. The
+ // `QC.E.LI` compression pattern to `C.LI` is compression-only (because we
+ // don't want `c.li` ever printed as `qc.e.li`, which might be done if the
+ // pattern applied to decompression), but that doesn't help much becuase
+ // `C.LI` with a bare symbol will decompress to an `ADDI` anyway (because
+ // `simm12`'s MCOperandPredicate accepts a bare symbol and that pattern
+ // comes first), and we still cannot emit an `ADDI` with a bare symbol.
+ assert(STI.hasFeature(RISCV::FeatureVendorXqcili) &&
+ "C.LI is only relaxable with Xqcili");
+ Res.setOpcode(getRelaxedOpcode(Inst.getOpcode(), Inst.getOperands(), STI));
+ Res.addOperand(Inst.getOperand(0));
+ Res.addOperand(Inst.getOperand(1));
+ break;
+ }
case RISCV::BEQ:
case RISCV::BNE:
case RISCV::BLT:
@@ -539,10 +573,18 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
(Bit5 << 2);
return Value;
}
+ case RISCV::fixup_riscv_rvc_imm: {
+ if (!isInt<6>(Value))
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ unsigned Bit5 = (Value >> 5) & 0x1;
+ unsigned Bit4_0 = Value & 0x1f;
+ Value = (Bit5 << 12) | (Bit4_0 << 2);
+ return Value;
+ }
case RISCV::fixup_riscv_qc_e_32: {
if (!isInt<32>(Value))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
- return ((Value & 0xffffffff) << 16);
+ return Value & 0xffffffffu;
}
case RISCV::fixup_riscv_qc_abs20_u: {
if (!isInt<20>(Value))
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 8ab2c56ae3178d..146e3ab2fc38d9 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -135,6 +135,9 @@ unsigned RISCVELFObjectWriter::getRelocType(const MCFixup &Fixup,
return ELF::R_RISCV_LO12_I;
case RISCV::fixup_riscv_lo12_s:
return ELF::R_RISCV_LO12_S;
+ case RISCV::fixup_riscv_rvc_imm:
+ reportError(Fixup.getLoc(), "No relocation for CI-type instructions");
+ return ELF::R_RISCV_NONE;
case RISCV::fixup_riscv_qc_e_32:
return ELF::R_RISCV_QC_E_32;
case RISCV::fixup_riscv_qc_abs20_u:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index c1cdf511fae5b5..f816561ccf3ff5 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -40,12 +40,16 @@ enum Fixups {
fixup_riscv_rvc_jump,
// 8-bit fixup for symbol references in the compressed branch instruction
fixup_riscv_rvc_branch,
+ // 6-bit fixup for symbol references in instructions like c.li
+ fixup_riscv_rvc_imm,
// Fixup representing a legacy no-pic function call attached to the auipc
// instruction in a pair composed of adjacent auipc+jalr instructions.
fixup_riscv_call,
// Fixup representing a function call attached to the auipc instruction in a
// pair composed of adjacent auipc+jalr instructions.
fixup_riscv_call_plt,
+
+ // Qualcomm specific fixups
// 12-bit fixup for symbol references in the 48-bit Xqcibi branch immediate
// instructions
fixup_riscv_qc_e_branch,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 2ed7cd9f008a63..cbeabdddb93716 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -650,6 +650,8 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
FixupKind = RISCV::fixup_riscv_rvc_jump;
} else if (MIFrm == RISCVII::InstFormatCB) {
FixupKind = RISCV::fixup_riscv_rvc_branch;
+ } else if (MIFrm == RISCVII::InstFormatCI) {
+ FixupKind = RISCV::fixup_riscv_rvc_imm;
} else if (MIFrm == RISCVII::InstFormatI) {
FixupKind = RISCV::fixup_riscv_12_i;
} else if (MIFrm == RISCVII::InstFormatQC_EB) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index b2bf09028bc40b..67ba2e6b8eb336 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1595,3 +1595,82 @@ def : CompressPat<(QC_E_BGEUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0
def : CompressPat<(QC_E_BLTUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0:$imm12),
(QC_BLTUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0:$imm12)>;
} // let isCompressOnly = true, Predicates = [HasVendorXqcibi, IsRV32]
+
+// HACKS
+// -----
+// The reasons for needing the definitions below are long and quite annoying. I'm writing
+// this so they are explained in-line, rather than anywhere else.
+//
+// Emitting an instruction to an object proceeds as:
+// - Compression (in emitInstruction)
+// - Emit to Binary Code + Fixups
+// - Assembler Relaxation
+// - Fixup evaluation/application
+// - If relaxed, re-emitted to Binary + Fixups
+// - Relocation generation from Fixups
+//
+// Unfortunately, the `QC.E.LI` -> `C.LI` compression pattern has an edge case that has
+// caused crashes in the past.
+//
+// How the bug happens is:
+// - QC.E.LI is parsed with a bare symbol, which is valid + expected, and can
+// be handled by fixups/relocations.
+// - Compression turns this into a `C.LI` because the `simm6`
+// MCOperandPredicate accepts bare symbols.
+// - Binary Code emission didn't know how to create a fixup for a CI-type
+// instruction containing a bare symbol.
+//
+// The solution to the last bullet is that we added the `fixup_riscv_rvc_imm`,
+// so that we could proceed past the last error, and then use Assembler Relaxation
+// to turn the `C.LI` with a bare symbol back into a `QC.E.LI`.
+//
+// This is good enough for emitting objects, but doesn't work for emitting
+// assembly. Emitting assembly is why we need the following Hacks.
+//
+// Emitting an instruction to assembly proceeds as:
+// - Compression (in emitInstruction)
+// - Decompression (in RISCVInstPrinter::printInst)
+// - InstAliases are applied
+//
+// So in the case of `QC.E.LI` with a bare symbol, first it is compressed to
+// `C.LI` with a bare symbol, and then it is decompressed to `ADDI` with a bare
+// symbol for printing, which is printed via an alias as `li <reg>, <symbol>`.
+// Both the decompression and the alias use the MCOperandPredicate from
+// `simm12`, which accepts bare symbols.
+//
+// The problem here is that `li <reg>, <symbol>` fails to parse, because the
+// parsers do not accept bare symbols, they only accept symbols with specifiers
+// or immediates.
+//
+// Our solution is to add another alias, which will be prioritised above the
+// `li` alias, but only when `qc.e.li` is available. We originally intended to
+// use the `bare_symbol` Operand type, but this had no MCOperandPredicate, and
+// adding one changed the error messages when parsing `qc.e.li` with a
+// too-large constant. So instead, we add a new `AsmOperand` and `Operand` type,
+// just for the alias, which parse just like a BareSymbol, but they
+// have both an MCOperandPredicate, and the error message that corresponds to
+// the existing one on `qc.e.li` for too-large immediates (which fail to parse
+// as both an immediate, and a bare symbol).
+//
+// This is fairly unpleasant, but it's the least disruptive thing we can do
+// and keeps all the hacks confined to the RISC-V backend code.
+
+def BareSymbolQC_E_LI : AsmOperandClass {
+ let Name = "BareSymbolQC_E_LI";
+ let PredicateMethod = "isBareSymbol";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidBareSymbolQC_E_LI";
+ let ParserMethod = "parseBareSymbol";
+}
+
+def hack_bare_symbol_qc_e_li : Operand<XLenVT> {
+ let ParserMatchClass = BareSymbolQC_E_LI;
+ let MCOperandPredicate = [{
+ return MCOp.isExpr() && MCOp.isBareSymbolRef();
+ }];
+}
+
+let Predicates = [HasVendorXqcili, IsRV32] in {
+def : InstAlias<"qc.e.li $rd, $sym", (ADDI GPR:$rd, X0, hack_bare_symbol_qc_e_li:$sym), 3>;
+} // Predicates = [HasVendorXqcili, IsRV32]
+// END HACKS
diff --git a/llvm/test/MC/RISCV/Relocations/mc-dump.s b/llvm/test/MC/RISCV/Relocations/mc-dump.s
index 98205925da1cd7..6da5ce3a151cf7 100644
--- a/llvm/test/MC/RISCV/Relocations/mc-dump.s
+++ b/llvm/test/MC/RISCV/Relocations/mc-dump.s
@@ -7,7 +7,7 @@
# CHECK-NEXT: Symbol @0 .text
# CHECK-NEXT:0 Align Align:4 Value:0 ValueSize:1 MaxBytesToEmit:4 Nops
# CHECK-NEXT:0 Data LinkerRelaxable Size:8 [97,00,00,00,e7,80,00,00]
-# CHECK-NEXT: Fixup @0 Value:specifier(19,ext) Kind:4022
+# CHECK-NEXT: Fixup @0 Value:specifier(19,ext) Kind:4023
# CHECK-NEXT: Symbol @0 $x
# CHECK-NEXT:8 Align Align:8 Value:0 ValueSize:1 MaxBytesToEmit:8 Nops
# CHECK-NEXT:12 Data Size:4 [13,05,30,00]
diff --git a/llvm/test/MC/RISCV/xqci-fixups.s b/llvm/test/MC/RISCV/xqci-fixups.s
new file mode 100644
index 00000000000000..410126d9fd8574
--- /dev/null
+++ b/llvm/test/MC/RISCV/xqci-fixups.s
@@ -0,0 +1,36 @@
+# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \
+# RUN: --mattr=+experimental-xqcili,+experimental-xqcilb,+experimental-xqcibi \
+# RUN: -riscv-add-build-attributes \
+# RUN: | llvm-objdump --no-print-imm-hex -M no-aliases -d - \
+# RUN: | FileCheck -check-prefix=CHECK-INSTR %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 %s \
+# RUN: --mattr=+experimental-xqcili,+experimental-xqcilb,+experimental-xqcibi \
+# RUN: | llvm-readobj -r - | FileCheck %s -check-prefix=CHECK-REL
+
+## This checks that, if the assembler can resolve the qc fixup, that the fixup
+## is applied correctly to the instruction.
+
+.L0:
+# CHECK-INSTR: qc.e.beqi a0, 64, 0x0
+qc.e.beqi a0, 64, .L0
+# CHECK-INSTR: qc.e.j 0x10000016
+qc.e.j func
+# CHECK-INSTR: qc.e.li a0, 8
+qc.e.li a0, abs_sym
+# CHECK-INSTR: qc.li a0, 8
+qc.li a0, %qc.abs20(abs_sym)
+
+
+
+# This has to come after the instructions that use it or it will
+# be evaluated at parse-time (avoiding fixups)
+abs_sym = 8
+
+
+.space 0x10000000
+func:
+ ret
+
+## All these fixups should be resolved by the assembler without emitting
+## relocations.
+# CHECK-REL-NOT: R_RISCV
diff --git a/llvm/test/MC/RISCV/xqcibi-relocations.s b/llvm/test/MC/RISCV/xqcibi-relocations.s
index 0f7cc8c5787a1e..931cd7c9314bb8 100644
--- a/llvm/test/MC/RISCV/xqcibi-relocations.s
+++ b/llvm/test/MC/RISCV/xqcibi-relocations.s
@@ -84,7 +84,20 @@ qc.bnei t3, 14, same_section
# OBJ-NEXT: qc.e.bgeui s2, 0x18, 0x28 <same_section>
qc.e.bgeui s2, 24, same_section
-.option norelax
+## Enable compression/relaxation to check how symbols are handled.
+.option noexact
+
+# ASM: qc.bnei t1, 10, undef
+# OBJ: qc.beqi t1, 0xa, 0x42 <same_section_extern+0x16>
+# OBJ-NEXT: j 0x3e <same_section_extern+0x12>
+# OBJ-NEXT: R_RISCV_JAL undef{{$}}
+qc.bnei t1, 10, undef
+
+# ASM: qc.e.bgeui s0, 40, undef
+# OBJ-NEXT: qc.e.bltui s0, 0x28, 0x4c <same_section_extern+0x20>
+# OBJ-NEXT: j 0x48 <same_section_extern+0x1c>
+# OBJ-NEXT: R_RISCV_JAL undef{{$}}
+qc.e.bgeui s0, 40, undef
.section .text.second, "ax", @progbits
diff --git a/llvm/test/MC/RISCV/xqcilb-relocations.s b/llvm/test/MC/RISCV/xqcilb-relocations.s
index ab08beed9be94d..48c8c6931c8af0 100644
--- a/llvm/test/MC/RISCV/xqcilb-relocations.s
+++ b/llvm/test/MC/RISCV/xqcilb-relocations.s
@@ -92,7 +92,22 @@ qc.e.j same_section
# OBJ-NEXT: R_RISCV_RELAX
qc.e.jal same_section
-.option norelax
+## Enable compression/relaxation to check how symbols are handled.
+.option noexact
+
+qc.e.j undef
+# ASM: j undef
+# OBJ: qc.e.j 0x44 <same_section_extern+0x10>
+# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
+# OBJ-NEXT: R_RISCV_CUSTOM195 undef{{$}}
+# OBJ-NEXT: R_RISCV_RELAX
+
+qc.e.jal undef
+# ASM: jal undef
+# OBJ: qc.e.jal 0x4a <same_section_extern+0x16>
+# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
+# OBJ-NEXT: R_RISCV_CUSTOM195 undef{{$}}
+# OBJ-NEXT: R_RISCV_RELAX
.section .text.other, "ax", @progbits
diff --git a/llvm/test/MC/RISCV/xqcili-relocations.s b/llvm/test/MC/RISCV/xqcili-relocations.s
index 866586236fa46b..7eff61fc782d89 100644
--- a/llvm/test/MC/RISCV/xqcili-relocations.s
+++ b/llvm/test/MC/RISCV/xqcili-relocations.s
@@ -97,7 +97,22 @@ qc.li a1, %qc.abs20(undef)
# OBJ-NEXT: R_RISCV_RELAX
qc.e.li s1, undef
-.option norelax
+## Enable compression/relaxation to check how symbols are handled.
+.option noexact
+
+# ASM: qc.li a1, %qc.abs20(undef)
+# OBJ-NEXT: qc.li a1, 0x0
+# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
+# OBJ-NEXT: R_RISCV_CUSTOM192 undef{{$}}
+# OBJ-NEXT: R_RISCV_RELAX
+qc.li a1, %qc.abs20(undef)
+
+# ASM: qc.e.li a2, undef
+# OBJ-NEXT: qc.e.li a2, 0x0
+# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
+# OBJ-NEXT: R_RISCV_CUSTOM194 undef{{$}}
+# OBJ-NEXT: R_RISCV_RELAX
+qc.e.li a2, undef
.section .text.other, "ax", @progbits
``````````
</details>
https://github.com/llvm/llvm-project/pull/146763
More information about the llvm-commits
mailing list