[llvm] [LLVM] Update CUDA ELF flags for their new ABI (PR #149534)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 18 08:42:17 PDT 2025
https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/149534
Summary:
We rely on these flags to do things in the runtime and print the
contents of binaries correctly. CUDA updated their ABI encoding recently
and we didn't handle that. it's a new ABI entirely so we just select on
it when it shows up.
Fixes: https://github.com/llvm/llvm-project/issues/148703
>From 421d233ed86c5b87ed4dc5e8c49d0745da4b226c Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 18 Jul 2025 10:25:46 -0500
Subject: [PATCH] [LLVM] Update CUDA ELF flags for their new ABI
Summary:
We rely on these flags to do things in the runtime and print the
contents of binaries correctly. CUDA updated their ABI encoding recently
and we didn't handle that. it's a new ABI entirely so we just select on
it when it shows up.
Fixes: https://github.com/llvm/llvm-project/issues/148703
---
llvm/include/llvm/BinaryFormat/ELF.h | 21 +++++-
llvm/include/llvm/Object/ELFObjectFile.h | 1 +
llvm/lib/BinaryFormat/ELF.cpp | 2 +
llvm/lib/Object/ELFObjectFile.cpp | 17 ++++-
llvm/tools/llvm-readobj/ELFDumper.cpp | 71 ++++++++++---------
llvm/unittests/Object/ELFObjectFileTest.cpp | 3 +-
.../plugins-nextgen/common/src/Utils/ELF.cpp | 15 ++--
offload/plugins-nextgen/cuda/src/rtl.cpp | 6 +-
8 files changed, 94 insertions(+), 42 deletions(-)
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6bf2e177b5d40..2b4ad1c775bb6 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -362,6 +362,7 @@ enum {
ELFOSABI_FENIXOS = 16, // FenixOS
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
+ ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
+// CUDA OS ABI Version identification.
+enum {
+ ELFABIVERSION_CUDA_V1 = 7,
+ ELFABIVERSION_CUDA_V2 = 8,
+};
+
#define ELF_RELOC(name, value) name = value,
// X86_64 relocations.
@@ -921,7 +928,7 @@ enum {
// NVPTX specific e_flags.
enum : unsigned {
- // Processor selection mask for EF_CUDA_SM* values.
+ // Processor selection mask for EF_CUDA_SM* values prior to blackwell.
EF_CUDA_SM = 0xff,
// SM based processor values.
@@ -954,12 +961,22 @@ enum : unsigned {
// The target is using 64-bit addressing.
EF_CUDA_64BIT_ADDRESS = 0x400,
// Set when using the sm_90a processor.
- EF_CUDA_ACCELERATORS = 0x800,
+ EF_CUDA_ACCELERATORS_V1 = 0x800,
// Undocumented software feature.
EF_CUDA_SW_FLAG_V2 = 0x1000,
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
EF_CUDA_VIRTUAL_SM = 0xff0000,
+
+ // Processor selection mask for EF_CUDA_SM* values following blackwell.
+ EF_CUDA_SM_MASK = 0xff00,
+
+ // SM based processor values.
+ EF_CUDA_SM100 = 0x6400,
+ EF_CUDA_SM120 = 0x7800,
+
+ // Set when using an accelerator variant like sm_100a.
+ EF_CUDA_ACCELERATORS = 0x8,
};
// ELF Relocation types for BPF
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index a3aa0d9c137a2..ced1afdd4cc6a 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
case ELF::ELFOSABI_OPENBSD:
return Triple::OpenBSD;
case ELF::ELFOSABI_CUDA:
+ case ELF::ELFOSABI_CUDA_V2:
return Triple::CUDA;
case ELF::ELFOSABI_AMDGPU_HSA:
return Triple::AMDHSA;
diff --git a/llvm/lib/BinaryFormat/ELF.cpp b/llvm/lib/BinaryFormat/ELF.cpp
index 0ad1a09429e7c..4c96deb0655e2 100644
--- a/llvm/lib/BinaryFormat/ELF.cpp
+++ b/llvm/lib/BinaryFormat/ELF.cpp
@@ -652,6 +652,7 @@ uint8_t ELF::convertNameToOSABI(StringRef Name) {
.StartsWith("fenixos", ELFOSABI_FENIXOS)
.StartsWith("cloudabi", ELFOSABI_CLOUDABI)
.StartsWith("cuda", ELFOSABI_CUDA)
+ .StartsWith("cuda", ELFOSABI_CUDA_V2)
.StartsWith("amdhsa", ELFOSABI_AMDGPU_HSA)
.StartsWith("amdpal", ELFOSABI_AMDGPU_PAL)
.StartsWith("mesa3d", ELFOSABI_AMDGPU_MESA3D)
@@ -696,6 +697,7 @@ StringRef ELF::convertOSABIToName(uint8_t OSABI) {
case ELFOSABI_CLOUDABI:
return "cloudabi";
case ELFOSABI_CUDA:
+ case ELFOSABI_CUDA_V2:
return "cuda";
case ELFOSABI_AMDGPU_HSA:
return "amdhsa";
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 5597d7db6426d..0919c6aad74f2 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -620,7 +620,9 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
StringRef ELFObjectFileBase::getNVPTXCPUName() const {
assert(getEMachine() == ELF::EM_CUDA);
- unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
+ unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
+ ? getPlatformFlags() & ELF::EF_CUDA_SM
+ : getPlatformFlags() & ELF::EF_CUDA_SM_MASK;
switch (SM) {
// Fermi architecture.
@@ -679,7 +681,18 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
// Hopper architecture.
case ELF::EF_CUDA_SM90:
- return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
+ : "sm_90";
+
+ // Blackwell architecture.
+ case ELF::EF_CUDA_SM100:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
+ : "sm_100";
+
+ // Rubin architecture.
+ case ELF::EF_CUDA_SM120:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
+ : "sm_120";
default:
llvm_unreachable("Unknown EF_CUDA_SM value");
}
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 465c189680cae..10431c6afdb45 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1083,26 +1083,26 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
};
const EnumEntry<unsigned> ElfOSABI[] = {
- {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
- {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
- {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
- {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
- {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
- {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
- {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
- {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
- {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
- {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
- {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
- {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
- {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
- {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
- {"AROS", "AROS", ELF::ELFOSABI_AROS},
- {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
- {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
- {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
- {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
-};
+ {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
+ {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
+ {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
+ {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
+ {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
+ {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
+ {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
+ {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
+ {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
+ {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
+ {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
+ {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
+ {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
+ {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
+ {"AROS", "AROS", ELF::ELFOSABI_AROS},
+ {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
+ {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
+ {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
+ {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
+ {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
@@ -1667,16 +1667,17 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
};
const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
- ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
- ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
- ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
- ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
- ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
- ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
- ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
- ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
- ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
- ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
+ ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
+ ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
+ ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
+ ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
+ ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
+ ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
+ ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
+ ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
+ ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
+ ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
+ ENUM_ENT(EF_CUDA_SM100, "sm_100"), ENUM_ENT(EF_CUDA_SM120, "sm_120"),
};
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
@@ -3651,10 +3652,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
else if (e.e_machine == EM_XTENSA)
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
unsigned(ELF::EF_XTENSA_MACH));
- else if (e.e_machine == EM_CUDA)
+ else if (e.e_machine == EM_CUDA) {
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
unsigned(ELF::EF_CUDA_SM));
- else if (e.e_machine == EM_AMDGPU) {
+ if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
+ (e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
+ ElfFlags += "a";
+ else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
+ (e.e_flags & ELF::EF_CUDA_ACCELERATORS))
+ ElfFlags += "a";
+ } else if (e.e_machine == EM_AMDGPU) {
switch (e.e_ident[ELF::EI_ABIVERSION]) {
default:
break;
diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp
index 25b390758f172..7f933dac18689 100644
--- a/llvm/unittests/Object/ELFObjectFileTest.cpp
+++ b/llvm/unittests/Object/ELFObjectFileTest.cpp
@@ -295,7 +295,8 @@ TEST(ELFObjectFileTest, CheckOSAndTriple) {
{ELF::EM_X86_64, ELF::ELFOSABI_AIX, "x86_64--aix"},
{ELF::EM_X86_64, ELF::ELFOSABI_FREEBSD, "x86_64--freebsd"},
{ELF::EM_X86_64, ELF::ELFOSABI_OPENBSD, "x86_64--openbsd"},
- {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"}};
+ {ELF::EM_CUDA, ELF::ELFOSABI_CUDA, "nvptx64-nvidia-cuda"},
+ {ELF::EM_CUDA, ELF::ELFOSABI_CUDA_V2, "nvptx64-nvidia-cuda"}};
for (auto [Machine, OS, Triple] : Formats) {
const DataForTest D(ELF::ELFCLASS64, ELF::ELFDATA2LSB, Machine, OS,
ELF::EF_AMDGPU_MACH_AMDGCN_LAST);
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index dfec55432f202..d97b6c630929c 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -73,10 +73,17 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
return createError("Unsupported AMDGPU architecture");
} else if (Header.e_machine == EM_CUDA) {
- if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
- return createError("Invalid CUDA addressing mode");
- if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
- return createError("Unsupported NVPTX architecture");
+ if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_CUDA_V1) {
+ if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
+ return createError("Invalid CUDA addressing mode");
+ if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
+ return createError("Unsupported NVPTX architecture");
+ } else if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_CUDA_V2) {
+ if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM100)
+ return createError("Unsupported NVPTX architecture");
+ } else {
+ return createError("Invalid CUDA ABI version");
+ }
}
return Header.e_machine == EMachine;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b787376eb1770..0f7e408436289 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
return ElfOrErr.takeError();
// Get the numeric value for the image's `sm_` value.
- auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
+ const auto Header = ElfOrErr->getELFFile().getHeader();
+ unsigned SM =
+ Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
+ ? Header.e_flags & ELF::EF_CUDA_SM
+ : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> 4;
CUdevice Device;
CUresult Res = cuDeviceGet(&Device, DeviceId);
More information about the llvm-commits
mailing list