[llvm] [RISCV] Fix QC.E.LI -> C.LI with Bare Symbol Compression (PR #146763)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 10 12:55:55 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Sam Elliott (lenary)

<details>
<summary>Changes</summary>

There's a long comment explaining this approach in RISCVInstrInfoXqci.td

This is presented as an alternative to llvm/llvm-project#<!-- -->146184.

---
Full diff: https://github.com/llvm/llvm-project/pull/146763.diff


11 Files Affected:

- (modified) llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp (+4) 
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp (+44-2) 
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp (+3) 
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h (+4) 
- (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp (+2) 
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td (+79) 
- (modified) llvm/test/MC/RISCV/Relocations/mc-dump.s (+1-1) 
- (added) llvm/test/MC/RISCV/xqci-fixups.s (+36) 
- (modified) llvm/test/MC/RISCV/xqcibi-relocations.s (+14-1) 
- (modified) llvm/test/MC/RISCV/xqcilb-relocations.s (+16-1) 
- (modified) llvm/test/MC/RISCV/xqcili-relocations.s (+16-1) 


``````````diff
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 66f4aade380fab..bcdd6c574870c7 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1650,6 +1650,10 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidSImm26:
     return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 25),
                                       (1 << 25) - 1);
+  // HACK: See comment before `BareSymbolQC_E_LI` in RISCVInstrInfoXqci.td.
+  case Match_InvalidBareSymbolQC_E_LI:
+    LLVM_FALLTHROUGH;
+  // END HACK
   case Match_InvalidBareSImm32:
     return generateImmOutOfRangeError(Operands, ErrorInfo,
                                       std::numeric_limits<int32_t>::min(),
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 89a87798d71e49..4c579e23e2410e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -76,12 +76,13 @@ MCFixupKindInfo RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_riscv_branch", 0, 32, 0},
       {"fixup_riscv_rvc_jump", 2, 11, 0},
       {"fixup_riscv_rvc_branch", 0, 16, 0},
+      {"fixup_riscv_rvc_imm", 0, 16, 0},
       {"fixup_riscv_call", 0, 64, 0},
       {"fixup_riscv_call_plt", 0, 64, 0},
 
       {"fixup_riscv_qc_e_branch", 0, 48, 0},
       {"fixup_riscv_qc_e_32", 16, 32, 0},
-      {"fixup_riscv_qc_abs20_u", 12, 20, 0},
+      {"fixup_riscv_qc_abs20_u", 0, 32, 0},
       {"fixup_riscv_qc_e_call_plt", 0, 48, 0},
 
       // Andes fixups
@@ -134,6 +135,10 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
     // For jump instructions the immediate must be in the range
     // [-1048576, 1048574]
     return Offset > 1048574 || Offset < -1048576;
+  case RISCV::fixup_riscv_rvc_imm:
+    // This fixup can never be emitted as a relocation, so always needs to be
+    // relaxed.
+    return true;
   }
 }
 
@@ -152,6 +157,18 @@ static unsigned getRelaxedOpcode(unsigned Opcode, ArrayRef<MCOperand> Operands,
     // This only relaxes one "step" - i.e. from C.J to JAL, not from C.J to
     // QC.E.J, because we can always relax again if needed.
     return RISCV::JAL;
+  case RISCV::C_LI:
+    if (!STI.hasFeature(RISCV::FeatureVendorXqcili))
+      break;
+    // We only need this because `QC.E.LI` can be compressed into a `C.LI`. This
+    // happens because the `simm6` MCOperandPredicate accepts bare symbols, and
+    // `QC.E.LI` is the only instruction that accepts bare symbols at parse-time
+    // and compresses to `C.LI`. `C.LI` does not itself accept bare symbols at
+    // parse time.
+    //
+    // If we have a bare symbol, we need to turn this back to a `QC.E.LI`, as we
+    // have no way to emit a relocation on a `C.LI` instruction.
+    return RISCV::QC_E_LI;
   case RISCV::JAL: {
     // We can only relax JAL if we have Xqcilb
     if (!STI.hasFeature(RISCV::FeatureVendorXqcilb))
@@ -240,6 +257,23 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
     Res.addOperand(Inst.getOperand(1));
     break;
   }
+  case RISCV::C_LI: {
+    // This should only be hit when trying to relax a `C.LI` into a `QC.E.LI`
+    // because the `C.LI` has a bare symbol. We cannot use
+    // `RISCVRVC::uncompress` because it will use decompression patterns. The
+    // `QC.E.LI` compression pattern to `C.LI` is compression-only (because we
+    // don't want `c.li` ever printed as `qc.e.li`, which might be done if the
+    // pattern applied to decompression), but that doesn't help much becuase
+    // `C.LI` with a bare symbol will decompress to an `ADDI` anyway (because
+    // `simm12`'s MCOperandPredicate accepts a bare symbol and that pattern
+    // comes first), and we still cannot emit an `ADDI` with a bare symbol.
+    assert(STI.hasFeature(RISCV::FeatureVendorXqcili) &&
+           "C.LI is only relaxable with Xqcili");
+    Res.setOpcode(getRelaxedOpcode(Inst.getOpcode(), Inst.getOperands(), STI));
+    Res.addOperand(Inst.getOperand(0));
+    Res.addOperand(Inst.getOperand(1));
+    break;
+  }
   case RISCV::BEQ:
   case RISCV::BNE:
   case RISCV::BLT:
@@ -539,10 +573,18 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
             (Bit5 << 2);
     return Value;
   }
+  case RISCV::fixup_riscv_rvc_imm: {
+    if (!isInt<6>(Value))
+      Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+    unsigned Bit5 = (Value >> 5) & 0x1;
+    unsigned Bit4_0 = Value & 0x1f;
+    Value = (Bit5 << 12) | (Bit4_0 << 2);
+    return Value;
+  }
   case RISCV::fixup_riscv_qc_e_32: {
     if (!isInt<32>(Value))
       Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
-    return ((Value & 0xffffffff) << 16);
+    return Value & 0xffffffffu;
   }
   case RISCV::fixup_riscv_qc_abs20_u: {
     if (!isInt<20>(Value))
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 8ab2c56ae3178d..146e3ab2fc38d9 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -135,6 +135,9 @@ unsigned RISCVELFObjectWriter::getRelocType(const MCFixup &Fixup,
     return ELF::R_RISCV_LO12_I;
   case RISCV::fixup_riscv_lo12_s:
     return ELF::R_RISCV_LO12_S;
+  case RISCV::fixup_riscv_rvc_imm:
+    reportError(Fixup.getLoc(), "No relocation for CI-type instructions");
+    return ELF::R_RISCV_NONE;
   case RISCV::fixup_riscv_qc_e_32:
     return ELF::R_RISCV_QC_E_32;
   case RISCV::fixup_riscv_qc_abs20_u:
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index c1cdf511fae5b5..f816561ccf3ff5 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -40,12 +40,16 @@ enum Fixups {
   fixup_riscv_rvc_jump,
   // 8-bit fixup for symbol references in the compressed branch instruction
   fixup_riscv_rvc_branch,
+  // 6-bit fixup for symbol references in instructions like c.li
+  fixup_riscv_rvc_imm,
   // Fixup representing a legacy no-pic function call attached to the auipc
   // instruction in a pair composed of adjacent auipc+jalr instructions.
   fixup_riscv_call,
   // Fixup representing a function call attached to the auipc instruction in a
   // pair composed of adjacent auipc+jalr instructions.
   fixup_riscv_call_plt,
+
+  // Qualcomm specific fixups
   // 12-bit fixup for symbol references in the 48-bit Xqcibi branch immediate
   // instructions
   fixup_riscv_qc_e_branch,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 2ed7cd9f008a63..cbeabdddb93716 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -650,6 +650,8 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
       FixupKind = RISCV::fixup_riscv_rvc_jump;
     } else if (MIFrm == RISCVII::InstFormatCB) {
       FixupKind = RISCV::fixup_riscv_rvc_branch;
+    } else if (MIFrm == RISCVII::InstFormatCI) {
+      FixupKind = RISCV::fixup_riscv_rvc_imm;
     } else if (MIFrm == RISCVII::InstFormatI) {
       FixupKind = RISCV::fixup_riscv_12_i;
     } else if (MIFrm == RISCVII::InstFormatQC_EB) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index b2bf09028bc40b..67ba2e6b8eb336 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1595,3 +1595,82 @@ def : CompressPat<(QC_E_BGEUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0
 def : CompressPat<(QC_E_BLTUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0:$imm12),
                   (QC_BLTUI GPRNoX0:$rs1, uimm5nonzero:$imm5, bare_simm13_lsb0:$imm12)>;
 } // let isCompressOnly = true, Predicates = [HasVendorXqcibi, IsRV32]
+
+// HACKS
+// -----
+// The reasons for needing the definitions below are long and quite annoying. I'm writing
+// this so they are explained in-line, rather than anywhere else.
+//
+// Emitting an instruction to an object proceeds as:
+// - Compression (in emitInstruction)
+// - Emit to Binary Code + Fixups
+// - Assembler Relaxation
+//   - Fixup evaluation/application
+//   - If relaxed, re-emitted to Binary + Fixups
+// - Relocation generation from Fixups
+//
+// Unfortunately, the `QC.E.LI` -> `C.LI` compression pattern has an edge case that has
+// caused crashes in the past.
+//
+// How the bug happens is:
+// - QC.E.LI is parsed with a bare symbol, which is valid + expected, and can
+//   be handled by fixups/relocations.
+// - Compression turns this into a `C.LI` because the `simm6` 
+//   MCOperandPredicate accepts bare symbols.
+// - Binary Code emission didn't know how to create a fixup for a CI-type
+//   instruction containing a bare symbol.
+//
+// The solution to the last bullet is that we added the `fixup_riscv_rvc_imm`,
+// so that we could proceed past the last error, and then use Assembler Relaxation
+// to turn the `C.LI` with a bare symbol back into a `QC.E.LI`.
+//
+// This is good enough for emitting objects, but doesn't work for emitting
+// assembly. Emitting assembly is why we need the following Hacks.
+// 
+// Emitting an instruction to assembly proceeds as:
+// - Compression (in emitInstruction)
+// - Decompression (in RISCVInstPrinter::printInst)
+// - InstAliases are applied
+//
+// So in the case of `QC.E.LI` with a bare symbol, first it is compressed to
+// `C.LI` with a bare symbol, and then it is decompressed to `ADDI` with a bare
+// symbol for printing, which is printed via an alias as `li <reg>, <symbol>`.
+// Both the decompression and the alias use the MCOperandPredicate from 
+// `simm12`, which accepts bare symbols.
+//
+// The problem here is that `li <reg>, <symbol>` fails to parse, because the
+// parsers do not accept bare symbols, they only accept symbols with specifiers
+// or immediates.
+//
+// Our solution is to add another alias, which will be prioritised above the
+// `li` alias, but only when `qc.e.li` is available. We originally intended to
+// use the `bare_symbol` Operand type, but this had no MCOperandPredicate, and
+// adding one changed the error messages when parsing `qc.e.li` with a
+// too-large constant. So instead, we add a new `AsmOperand` and `Operand` type,
+// just for the alias, which parse just like a BareSymbol, but they
+// have both an MCOperandPredicate, and the error message that corresponds to
+// the existing one on `qc.e.li` for too-large immediates (which fail to parse
+// as both an immediate, and a bare symbol).
+//
+// This is fairly unpleasant, but it's the least disruptive thing we can do
+// and keeps all the hacks confined to the RISC-V backend code.
+
+def BareSymbolQC_E_LI : AsmOperandClass {
+  let Name = "BareSymbolQC_E_LI";
+  let PredicateMethod = "isBareSymbol";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "InvalidBareSymbolQC_E_LI";
+  let ParserMethod = "parseBareSymbol";
+}
+
+def hack_bare_symbol_qc_e_li : Operand<XLenVT> {
+  let ParserMatchClass = BareSymbolQC_E_LI;
+  let MCOperandPredicate = [{
+    return MCOp.isExpr() && MCOp.isBareSymbolRef();
+  }];
+}
+
+let Predicates = [HasVendorXqcili, IsRV32] in {
+def : InstAlias<"qc.e.li $rd, $sym", (ADDI GPR:$rd, X0, hack_bare_symbol_qc_e_li:$sym), 3>;
+} // Predicates = [HasVendorXqcili, IsRV32]
+// END HACKS
diff --git a/llvm/test/MC/RISCV/Relocations/mc-dump.s b/llvm/test/MC/RISCV/Relocations/mc-dump.s
index 98205925da1cd7..6da5ce3a151cf7 100644
--- a/llvm/test/MC/RISCV/Relocations/mc-dump.s
+++ b/llvm/test/MC/RISCV/Relocations/mc-dump.s
@@ -7,7 +7,7 @@
 # CHECK-NEXT:  Symbol @0 .text
 # CHECK-NEXT:0 Align Align:4 Value:0 ValueSize:1 MaxBytesToEmit:4 Nops
 # CHECK-NEXT:0 Data LinkerRelaxable Size:8 [97,00,00,00,e7,80,00,00]
-# CHECK-NEXT:  Fixup @0 Value:specifier(19,ext) Kind:4022
+# CHECK-NEXT:  Fixup @0 Value:specifier(19,ext) Kind:4023
 # CHECK-NEXT:  Symbol @0 $x
 # CHECK-NEXT:8 Align Align:8 Value:0 ValueSize:1 MaxBytesToEmit:8 Nops
 # CHECK-NEXT:12 Data Size:4 [13,05,30,00]
diff --git a/llvm/test/MC/RISCV/xqci-fixups.s b/llvm/test/MC/RISCV/xqci-fixups.s
new file mode 100644
index 00000000000000..410126d9fd8574
--- /dev/null
+++ b/llvm/test/MC/RISCV/xqci-fixups.s
@@ -0,0 +1,36 @@
+# RUN: llvm-mc -filetype=obj -triple riscv32  < %s \
+# RUN:     --mattr=+experimental-xqcili,+experimental-xqcilb,+experimental-xqcibi \
+# RUN:     -riscv-add-build-attributes \
+# RUN:     | llvm-objdump --no-print-imm-hex -M no-aliases -d - \
+# RUN:     | FileCheck -check-prefix=CHECK-INSTR %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 %s \
+# RUN:     --mattr=+experimental-xqcili,+experimental-xqcilb,+experimental-xqcibi \
+# RUN:     | llvm-readobj -r - | FileCheck %s -check-prefix=CHECK-REL
+
+## This checks that, if the assembler can resolve the qc fixup, that the fixup
+## is applied correctly to the instruction.
+
+.L0:
+# CHECK-INSTR: qc.e.beqi a0, 64, 0x0
+qc.e.beqi a0, 64, .L0
+# CHECK-INSTR: qc.e.j 0x10000016
+qc.e.j func
+# CHECK-INSTR: qc.e.li a0, 8
+qc.e.li a0, abs_sym
+# CHECK-INSTR: qc.li a0, 8
+qc.li a0, %qc.abs20(abs_sym)
+
+
+
+# This has to come after the instructions that use it or it will
+# be evaluated at parse-time (avoiding fixups)
+abs_sym = 8
+
+
+.space 0x10000000
+func:
+  ret
+
+## All these fixups should be resolved by the assembler without emitting
+## relocations.
+# CHECK-REL-NOT: R_RISCV
diff --git a/llvm/test/MC/RISCV/xqcibi-relocations.s b/llvm/test/MC/RISCV/xqcibi-relocations.s
index 0f7cc8c5787a1e..931cd7c9314bb8 100644
--- a/llvm/test/MC/RISCV/xqcibi-relocations.s
+++ b/llvm/test/MC/RISCV/xqcibi-relocations.s
@@ -84,7 +84,20 @@ qc.bnei t3, 14, same_section
 # OBJ-NEXT: qc.e.bgeui s2, 0x18, 0x28 <same_section>
 qc.e.bgeui s2, 24, same_section
 
-.option norelax
+## Enable compression/relaxation to check how symbols are handled.
+.option noexact
+
+# ASM: qc.bnei t1, 10, undef
+# OBJ: qc.beqi t1, 0xa, 0x42 <same_section_extern+0x16>
+# OBJ-NEXT: j 0x3e <same_section_extern+0x12>
+# OBJ-NEXT: R_RISCV_JAL undef{{$}}
+qc.bnei t1, 10, undef
+
+# ASM: qc.e.bgeui s0, 40, undef
+# OBJ-NEXT: qc.e.bltui s0, 0x28, 0x4c <same_section_extern+0x20>
+# OBJ-NEXT: j 0x48 <same_section_extern+0x1c>
+# OBJ-NEXT: R_RISCV_JAL undef{{$}}
+qc.e.bgeui s0, 40, undef
 
 .section .text.second, "ax", @progbits
 
diff --git a/llvm/test/MC/RISCV/xqcilb-relocations.s b/llvm/test/MC/RISCV/xqcilb-relocations.s
index ab08beed9be94d..48c8c6931c8af0 100644
--- a/llvm/test/MC/RISCV/xqcilb-relocations.s
+++ b/llvm/test/MC/RISCV/xqcilb-relocations.s
@@ -92,7 +92,22 @@ qc.e.j same_section
 # OBJ-NEXT: R_RISCV_RELAX
 qc.e.jal same_section
 
-.option norelax
+## Enable compression/relaxation to check how symbols are handled.
+.option noexact
+
+qc.e.j undef
+# ASM: j undef
+# OBJ: qc.e.j 0x44 <same_section_extern+0x10>
+# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
+# OBJ-NEXT: R_RISCV_CUSTOM195 undef{{$}}
+# OBJ-NEXT: R_RISCV_RELAX
+
+qc.e.jal undef
+# ASM: jal undef
+# OBJ: qc.e.jal 0x4a <same_section_extern+0x16>
+# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
+# OBJ-NEXT: R_RISCV_CUSTOM195 undef{{$}}
+# OBJ-NEXT: R_RISCV_RELAX
 
 .section .text.other, "ax", @progbits
 
diff --git a/llvm/test/MC/RISCV/xqcili-relocations.s b/llvm/test/MC/RISCV/xqcili-relocations.s
index 866586236fa46b..7eff61fc782d89 100644
--- a/llvm/test/MC/RISCV/xqcili-relocations.s
+++ b/llvm/test/MC/RISCV/xqcili-relocations.s
@@ -97,7 +97,22 @@ qc.li a1, %qc.abs20(undef)
 # OBJ-NEXT: R_RISCV_RELAX
 qc.e.li s1, undef
 
-.option norelax
+## Enable compression/relaxation to check how symbols are handled.
+.option noexact
+
+# ASM: qc.li a1, %qc.abs20(undef)
+# OBJ-NEXT: qc.li a1, 0x0
+# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
+# OBJ-NEXT: R_RISCV_CUSTOM192 undef{{$}}
+# OBJ-NEXT: R_RISCV_RELAX
+qc.li a1, %qc.abs20(undef)
+
+# ASM: qc.e.li a2, undef
+# OBJ-NEXT: qc.e.li a2, 0x0
+# OBJ-NEXT: R_RISCV_VENDOR QUALCOMM{{$}}
+# OBJ-NEXT: R_RISCV_CUSTOM194 undef{{$}}
+# OBJ-NEXT: R_RISCV_RELAX
+qc.e.li a2, undef
 
 .section .text.other, "ax", @progbits
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/146763


More information about the llvm-commits mailing list