[llvm] [RISCV] Add big-endian support to RISC-V backend (PR #146534)

Djordje Todorovic via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 30 04:08:29 PDT 2025


https://github.com/djtodoro updated https://github.com/llvm/llvm-project/pull/146534

>From 2d6aa04463f79bbcfeef7f4d5c4d941430aa3e9f Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic at htecgroup.com>
Date: Mon, 2 Jun 2025 14:07:16 +0200
Subject: [PATCH] [RISCV] Add initial assembler/MC layer support for big-endian

This patch adds basic assembler and MC layer infrastructure for
RISC-V big-endian targets (riscv32be/riscv64be):

  - Register big-endian targets in RISCVTargetMachine
  - Add big-endian data layout strings
  - Implement endianness-aware fixup application in assembler
    backend
  - Add byte swapping for data fixups on BE cores
  - Update MC layer components (AsmInfo, MCTargetDesc, Disassembler,
    AsmParser)
  - Handle BE targets in AddressSanitizer

This provides the foundation for BE support but does not yet include:
  - Codegen patterns for BE
  - Load/store instruction handling
  - BE-specific subtarget features
---
 llvm/cmake/config.guess                       |  2 +-
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  2 +
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp |  2 +
 .../RISCV/Disassembler/RISCVDisassembler.cpp  |  4 ++
 .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp    | 52 +++++++++++++++----
 .../RISCV/MCTargetDesc/RISCVAsmBackend.h      |  2 +-
 .../RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp     |  1 +
 .../RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp  |  3 +-
 llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp     |  2 +
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  | 30 ++++++-----
 .../RISCV/TargetInfo/RISCVTargetInfo.cpp      | 14 +++++
 .../Target/RISCV/TargetInfo/RISCVTargetInfo.h |  2 +
 .../Instrumentation/AddressSanitizer.cpp      |  3 +-
 13 files changed, 93 insertions(+), 26 deletions(-)

diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess
index 96cc554f181ab..27b55bd166edc 100644
--- a/llvm/cmake/config.guess
+++ b/llvm/cmake/config.guess
@@ -1003,7 +1003,7 @@ EOF
     ppcle:Linux:*:*)
 	echo powerpcle-unknown-linux-gnu
 	exit ;;
-    riscv32:Linux:*:* | riscv64:Linux:*:*)
+    riscv32:Linux:*:* | riscv64:Linux:*:* | riscv32be:Linux:*:* | riscv64be:Linux:*:*)
 	LIBC=gnu
 	eval $set_cc_for_build
 	# Do not check for __GLIBC__ because uclibc defines it too
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index e9172f4acb0cb..94d39ad403d17 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -247,6 +247,8 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
     break;
   case Triple::riscv32:
   case Triple::riscv64:
+  case Triple::riscv32be:
+  case Triple::riscv64be:
     LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
     PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
                           dwarf::DW_EH_PE_sdata4;
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index d71c42c0a5fc1..c87362a1b4e87 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -4053,4 +4053,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeRISCVAsmParser() {
   RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target());
   RegisterMCAsmParser<RISCVAsmParser> Y(getTheRISCV64Target());
+  RegisterMCAsmParser<RISCVAsmParser> A(getTheRISCV32beTarget());
+  RegisterMCAsmParser<RISCVAsmParser> B(getTheRISCV64beTarget());
 }
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 67cc01e647a04..c535dc0891f9c 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -72,6 +72,10 @@ LLVMInitializeRISCVDisassembler() {
                                          createRISCVDisassembler);
   TargetRegistry::RegisterMCDisassembler(getTheRISCV64Target(),
                                          createRISCVDisassembler);
+  TargetRegistry::RegisterMCDisassembler(getTheRISCV32beTarget(),
+                                         createRISCVDisassembler);
+  TargetRegistry::RegisterMCDisassembler(getTheRISCV64beTarget(),
+                                         createRISCVDisassembler);
 }
 
 static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 82e3b5ceb4ef6..f2126ea02c6eb 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -33,9 +33,11 @@ static cl::opt<bool> ULEB128Reloc(
     cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate"));
 
 RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI,
-                                 bool Is64Bit, const MCTargetOptions &Options)
-    : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI),
-      Is64Bit(Is64Bit), TargetOptions(Options) {
+                                 bool Is64Bit, bool IsLittleEndian,
+                                 const MCTargetOptions &Options)
+    : MCAsmBackend(IsLittleEndian ? llvm::endianness::little
+                                  : llvm::endianness::big),
+      STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
   RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
 }
 
@@ -370,7 +372,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
     OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
     Offset = OS.tell();
     Fixup = RISCV::getRelocPairForSize(2);
-    support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
+    support::endian::write<uint16_t>(OS, 0, Endian);
   }
 
   const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
@@ -426,15 +428,15 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const {
     AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6});
   } else if (isUInt<8>(Value)) {
     OS << uint8_t(dwarf::DW_CFA_advance_loc1);
-    support::endian::write<uint8_t>(OS, 0, llvm::endianness::little);
+    support::endian::write<uint8_t>(OS, 0, Endian);
     AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8});
   } else if (isUInt<16>(Value)) {
     OS << uint8_t(dwarf::DW_CFA_advance_loc2);
-    support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
+    support::endian::write<uint16_t>(OS, 0, Endian);
     AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16});
   } else if (isUInt<32>(Value)) {
     OS << uint8_t(dwarf::DW_CFA_advance_loc4);
-    support::endian::write<uint32_t>(OS, 0, llvm::endianness::little);
+    support::endian::write<uint32_t>(OS, 0, Endian);
     AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32});
   } else {
     llvm_unreachable("unsupported CFA encoding");
@@ -880,6 +882,34 @@ bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup,
   return false;
 }
 
+// Data should be swapped for big endian cores.
+static bool isDataFixup(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+
+  case FK_Data_1:
+  case FK_Data_2:
+  case FK_Data_4:
+  case FK_Data_8:
+    return true;
+
+  case RISCV::fixup_riscv_hi20:
+  case RISCV::fixup_riscv_lo12_i:
+  case RISCV::fixup_riscv_lo12_s:
+  case RISCV::fixup_riscv_pcrel_hi20:
+  case RISCV::fixup_riscv_pcrel_lo12_i:
+  case RISCV::fixup_riscv_pcrel_lo12_s:
+  case RISCV::fixup_riscv_jal:
+  case RISCV::fixup_riscv_branch:
+  case RISCV::fixup_riscv_call:
+  case RISCV::fixup_riscv_call_plt:
+  case RISCV::fixup_riscv_rvc_jump:
+  case RISCV::fixup_riscv_rvc_branch:
+    return false;
+  }
+}
+
 void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
                                  const MCValue &Target,
                                  MutableArrayRef<char> Data, uint64_t Value,
@@ -905,8 +935,11 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
 
   // For each byte of the fragment that the fixup touches, mask in the
   // bits from the fixup value.
+  // For big endian cores, data fixup should be swapped.
+  bool SwapValue = (Endian == llvm::endianness::big) && isDataFixup(Kind);
   for (unsigned i = 0; i != NumBytes; ++i) {
-    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+    unsigned Idx = SwapValue ? (NumBytes - 1 - i) : i;
+    Data[Offset + Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
   }
 }
 
@@ -921,5 +954,6 @@ MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
                                           const MCTargetOptions &Options) {
   const Triple &TT = STI.getTargetTriple();
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
-  return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), Options);
+  return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), TT.isLittleEndian(),
+                             Options);
 }
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index d97d63204e7e4..80c1f200b09af 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -35,7 +35,7 @@ class RISCVAsmBackend : public MCAsmBackend {
 
 public:
   RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
-                  const MCTargetOptions &Options);
+                  bool IsLittleEndian, const MCTargetOptions &Options);
   ~RISCVAsmBackend() override = default;
 
   std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 090d331d99cab..77f65d814ce7a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
 void RISCVMCAsmInfo::anchor() {}
 
 RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
+  IsLittleEndian = TT.isLittleEndian();
   CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
   CommentString = "#";
   AlignmentIsInBytes = false;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 61ecfb278a7d3..d917ef4129791 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -376,7 +376,8 @@ static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
 
 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeRISCVTargetMC() {
-  for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
+  for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target(),
+                    &getTheRISCV32beTarget(), &getTheRISCV64beTarget()}) {
     TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
     TargetRegistry::RegisterMCObjectFileInfo(*T, createRISCVMCObjectFileInfo);
     TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 2f32e2ac3eba3..83566b1c57782 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -611,6 +611,8 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeRISCVAsmPrinter() {
   RegisterAsmPrinter<RISCVAsmPrinter> X(getTheRISCV32Target());
   RegisterAsmPrinter<RISCVAsmPrinter> Y(getTheRISCV64Target());
+  RegisterAsmPrinter<RISCVAsmPrinter> A(getTheRISCV32beTarget());
+  RegisterAsmPrinter<RISCVAsmPrinter> B(getTheRISCV64beTarget());
 }
 
 void RISCVAsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index da6ac2f6f31e9..3a16256aed462 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -117,6 +117,8 @@ static cl::opt<bool>
 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
   RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+  RegisterTargetMachine<RISCVTargetMachine> A(getTheRISCV32beTarget());
+  RegisterTargetMachine<RISCVTargetMachine> B(getTheRISCV64beTarget());
   auto *PR = PassRegistry::getPassRegistry();
   initializeGlobalISel(*PR);
   initializeRISCVO0PreLegalizerCombinerPass(*PR);
@@ -150,21 +152,23 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVAsmPrinterPass(*PR);
 }
 
-static StringRef computeDataLayout(const Triple &TT,
-                                   const TargetOptions &Options) {
-  StringRef ABIName = Options.MCOptions.getABIName();
-  if (TT.isArch64Bit()) {
-    if (ABIName == "lp64e")
-      return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S64";
+static std::string computeDataLayout(const Triple &TT,
+                                     const TargetOptions &Opts) {
+  const bool IsLittle = TT.isLittleEndian();
+  StringRef ABI = Opts.MCOptions.getABIName();
+  std::string DL;
 
-    return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+  if (TT.isArch64Bit()) {
+    DL = (Twine(IsLittle ? "e" : "E") + "-m:e-p:64:64-i64:64-i128:128-n32:64-" +
+          (ABI == "lp64e" ? "S64" : "S128"))
+             .str();
+  } else {
+    assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
+    DL = (Twine(IsLittle ? "e" : "E") + "-m:e-p:32:32-i64:64-n32-" +
+          (ABI == "ilp32e" ? "S32" : "S128"))
+             .str();
   }
-  assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
-
-  if (ABIName == "ilp32e")
-    return "e-m:e-p:32:32-i64:64-n32-S32";
-
-  return "e-m:e-p:32:32-i64:64-n32-S128";
+  return DL;
 }
 
 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
index fc0965d263a8a..7b0afe46971a3 100644
--- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
+++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -21,10 +21,24 @@ Target &llvm::getTheRISCV64Target() {
   return TheRISCV64Target;
 }
 
+Target &llvm::getTheRISCV32beTarget() {
+  static Target TheRISCV32beTarget;
+  return TheRISCV32beTarget;
+}
+
+Target &llvm::getTheRISCV64beTarget() {
+  static Target TheRISCV64beTarget;
+  return TheRISCV64beTarget;
+}
+
 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeRISCVTargetInfo() {
   RegisterTarget<Triple::riscv32, /*HasJIT=*/true> X(
       getTheRISCV32Target(), "riscv32", "32-bit RISC-V", "RISCV");
   RegisterTarget<Triple::riscv64, /*HasJIT=*/true> Y(
       getTheRISCV64Target(), "riscv64", "64-bit RISC-V", "RISCV");
+  RegisterTarget<Triple::riscv32be> A(getTheRISCV32beTarget(), "riscv32be",
+                                      "32-bit big endian RISC-V", "RISCV");
+  RegisterTarget<Triple::riscv64be> B(getTheRISCV64beTarget(), "riscv64be",
+                                      "64-bit big endian RISC-V", "RISCV");
 }
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
index ed00a01fa1a2a..9b9fd2cca2fd0 100644
--- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
+++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
@@ -15,6 +15,8 @@ class Target;
 
 Target &getTheRISCV32Target();
 Target &getTheRISCV64Target();
+Target &getTheRISCV32beTarget();
+Target &getTheRISCV64beTarget();
 
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index e87bee79a6a69..2eb727fc2a596 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -496,7 +496,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
   bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 ||
                    TargetTriple.getArch() == Triple::aarch64_be;
   bool IsLoongArch64 = TargetTriple.isLoongArch64();
-  bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64;
+  bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64 ||
+                   TargetTriple.getArch() == Triple::riscv64be;
   bool IsWindows = TargetTriple.isOSWindows();
   bool IsFuchsia = TargetTriple.isOSFuchsia();
   bool IsAMDGPU = TargetTriple.isAMDGPU();



More information about the llvm-commits mailing list