[llvm-branch-commits] [llvm] [LLVM] Update CUDA ELF flags for their new ABI (PR #159451)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Sep 17 14:05:49 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-binary-utilities
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Backport fix.
---
Full diff: https://github.com/llvm/llvm-project/pull/159451.diff
6 Files Affected:
- (modified) llvm/include/llvm/BinaryFormat/ELF.h (+25-3)
- (modified) llvm/include/llvm/Object/ELFObjectFile.h (+1)
- (modified) llvm/lib/Object/ELFObjectFile.cpp (+30-2)
- (modified) llvm/tools/llvm-readobj/ELFDumper.cpp (+82-32)
- (modified) offload/plugins-nextgen/common/src/Utils/ELF.cpp (+15-8)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+5-1)
``````````diff
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index ebb257ab33821..cfae75d093421 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -362,6 +362,7 @@ enum {
ELFOSABI_FENIXOS = 16, // FenixOS
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
+ ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
+// CUDA OS ABI Version identification.
+enum {
+ ELFABIVERSION_CUDA_V1 = 7,
+ ELFABIVERSION_CUDA_V2 = 8,
+};
+
#define ELF_RELOC(name, value) name = value,
// X86_64 relocations.
@@ -921,9 +928,15 @@ enum {
// NVPTX specific e_flags.
enum : unsigned {
- // Processor selection mask for EF_CUDA_SM* values.
+ // Processor selection mask for EF_CUDA_SM* values prior to blackwell.
EF_CUDA_SM = 0xff,
+ // Processor selection mask for EF_CUDA_SM* values following blackwell.
+ EF_CUDA_SM_MASK = 0xff00,
+
+ // Processor selection mask for EF_CUDA_SM* values following blackwell.
+ EF_CUDA_SM_OFFSET = 8,
+
// SM based processor values.
EF_CUDA_SM20 = 0x14,
EF_CUDA_SM21 = 0x15,
@@ -943,9 +956,15 @@ enum : unsigned {
EF_CUDA_SM80 = 0x50,
EF_CUDA_SM86 = 0x56,
EF_CUDA_SM87 = 0x57,
+ EF_CUDA_SM88 = 0x58,
EF_CUDA_SM89 = 0x59,
- // The sm_90a variant uses the same machine flag.
EF_CUDA_SM90 = 0x5a,
+ EF_CUDA_SM100 = 0x64,
+ EF_CUDA_SM101 = 0x65,
+ EF_CUDA_SM103 = 0x67,
+ EF_CUDA_SM110 = 0x6e,
+ EF_CUDA_SM120 = 0x78,
+ EF_CUDA_SM121 = 0x79,
// Unified texture binding is enabled.
EF_CUDA_TEXMODE_UNIFIED = 0x100,
@@ -954,12 +973,15 @@ enum : unsigned {
// The target is using 64-bit addressing.
EF_CUDA_64BIT_ADDRESS = 0x400,
// Set when using the sm_90a processor.
- EF_CUDA_ACCELERATORS = 0x800,
+ EF_CUDA_ACCELERATORS_V1 = 0x800,
// Undocumented software feature.
EF_CUDA_SW_FLAG_V2 = 0x1000,
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
EF_CUDA_VIRTUAL_SM = 0xff0000,
+
+ // Set when using an accelerator variant like sm_100a.
+ EF_CUDA_ACCELERATORS = 0x8,
};
// ELF Relocation types for BPF
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 103686884e705..30a9dd35f624e 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
case ELF::ELFOSABI_OPENBSD:
return Triple::OpenBSD;
case ELF::ELFOSABI_CUDA:
+ case ELF::ELFOSABI_CUDA_V2:
return Triple::CUDA;
case ELF::ELFOSABI_AMDGPU_HSA:
return Triple::AMDHSA;
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 0e13d32bbe522..a6b56ae77cf21 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -620,7 +620,10 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
StringRef ELFObjectFileBase::getNVPTXCPUName() const {
assert(getEMachine() == ELF::EM_CUDA);
- unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
+ unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
+ ? getPlatformFlags() & ELF::EF_CUDA_SM
+ : (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >>
+ ELF::EF_CUDA_SM_OFFSET;
switch (SM) {
// Fermi architecture.
@@ -672,6 +675,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
return "sm_86";
case ELF::EF_CUDA_SM87:
return "sm_87";
+ case ELF::EF_CUDA_SM88:
+ return "sm_88";
// Ada architecture.
case ELF::EF_CUDA_SM89:
@@ -679,7 +684,30 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
// Hopper architecture.
case ELF::EF_CUDA_SM90:
- return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
+ : "sm_90";
+
+ // Blackwell architecture.
+ case ELF::EF_CUDA_SM100:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
+ : "sm_100";
+ case ELF::EF_CUDA_SM101:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_101a"
+ : "sm_101";
+ case ELF::EF_CUDA_SM103:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a"
+ : "sm_103";
+ case ELF::EF_CUDA_SM110:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a"
+ : "sm_110";
+
+ // Blackwell architecture.
+ case ELF::EF_CUDA_SM120:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
+ : "sm_120";
+ case ELF::EF_CUDA_SM121:
+ return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_121a"
+ : "sm_121";
default:
llvm_unreachable("Unknown EF_CUDA_SM value");
}
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 101079f09e1d2..3fd167df1ecc5 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1083,26 +1083,26 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
};
const EnumEntry<unsigned> ElfOSABI[] = {
- {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
- {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
- {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
- {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
- {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
- {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
- {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
- {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
- {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
- {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
- {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
- {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
- {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
- {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
- {"AROS", "AROS", ELF::ELFOSABI_AROS},
- {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
- {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
- {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
- {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
-};
+ {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
+ {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
+ {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
+ {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
+ {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
+ {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
+ {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
+ {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
+ {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
+ {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
+ {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
+ {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
+ {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
+ {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
+ {"AROS", "AROS", ELF::ELFOSABI_AROS},
+ {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
+ {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
+ {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
+ {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
+ {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
@@ -1666,16 +1666,60 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
};
const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
- ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
- ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
- ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
- ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
- ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
- ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
- ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
- ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
- ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
- ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
+ ENUM_ENT(EF_CUDA_SM20, "sm_20"),
+ ENUM_ENT(EF_CUDA_SM21, "sm_21"),
+ ENUM_ENT(EF_CUDA_SM30, "sm_30"),
+ ENUM_ENT(EF_CUDA_SM32, "sm_32"),
+ ENUM_ENT(EF_CUDA_SM35, "sm_35"),
+ ENUM_ENT(EF_CUDA_SM37, "sm_37"),
+ ENUM_ENT(EF_CUDA_SM50, "sm_50"),
+ ENUM_ENT(EF_CUDA_SM52, "sm_52"),
+ ENUM_ENT(EF_CUDA_SM53, "sm_53"),
+ ENUM_ENT(EF_CUDA_SM60, "sm_60"),
+ ENUM_ENT(EF_CUDA_SM61, "sm_61"),
+ ENUM_ENT(EF_CUDA_SM62, "sm_62"),
+ ENUM_ENT(EF_CUDA_SM70, "sm_70"),
+ ENUM_ENT(EF_CUDA_SM72, "sm_72"),
+ ENUM_ENT(EF_CUDA_SM75, "sm_75"),
+ ENUM_ENT(EF_CUDA_SM80, "sm_80"),
+ ENUM_ENT(EF_CUDA_SM86, "sm_86"),
+ ENUM_ENT(EF_CUDA_SM87, "sm_87"),
+ ENUM_ENT(EF_CUDA_SM88, "sm_88"),
+ ENUM_ENT(EF_CUDA_SM89, "sm_89"),
+ ENUM_ENT(EF_CUDA_SM90, "sm_90"),
+ ENUM_ENT(EF_CUDA_SM100, "sm_100"),
+ ENUM_ENT(EF_CUDA_SM101, "sm_101"),
+ ENUM_ENT(EF_CUDA_SM103, "sm_103"),
+ ENUM_ENT(EF_CUDA_SM110, "sm_110"),
+ ENUM_ENT(EF_CUDA_SM120, "sm_120"),
+ ENUM_ENT(EF_CUDA_SM121, "sm_121"),
+ ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"),
+ ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"),
+ ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"),
+ ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"),
+ ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"),
+ ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"),
+ ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"),
+ ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"),
+ ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"),
+ ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"),
+ ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"),
+ ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"),
+ ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"),
+ ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"),
+ ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"),
+ ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"),
+ ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"),
+ ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"),
+ ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"),
+ ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"),
+ ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"),
+ ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"),
+ ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"),
+ ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"),
+ ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"),
+ ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"),
+ ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"),
};
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
@@ -3650,10 +3694,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
else if (e.e_machine == EM_XTENSA)
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
unsigned(ELF::EF_XTENSA_MACH));
- else if (e.e_machine == EM_CUDA)
+ else if (e.e_machine == EM_CUDA) {
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
unsigned(ELF::EF_CUDA_SM));
- else if (e.e_machine == EM_AMDGPU) {
+ if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
+ (e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
+ ElfFlags += "a";
+ else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
+ (e.e_flags & ELF::EF_CUDA_ACCELERATORS))
+ ElfFlags += "a";
+ } else if (e.e_machine == EM_AMDGPU) {
switch (e.e_ident[ELF::EI_ABIVERSION]) {
default:
break;
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index dfec55432f202..b0ee1984c42ce 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -60,23 +60,30 @@ static Expected<bool>
checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
const auto Header = ELFObj.getELFFile().getHeader();
if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
- return createError("Only executable ELF files are supported");
+ return createError("only executable ELF files are supported");
if (Header.e_machine == EM_AMDGPU) {
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
- return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
+ return createError("invalid AMD OS/ABI, must be AMDGPU_HSA");
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
- return createError("Invalid AMD ABI version, must be version 5 or above");
+ return createError("invalid AMD ABI version, must be version 5 or above");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
(Header.e_flags & EF_AMDGPU_MACH) >
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
- return createError("Unsupported AMDGPU architecture");
+ return createError("unsupported AMDGPU architecture");
} else if (Header.e_machine == EM_CUDA) {
- if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
- return createError("Invalid CUDA addressing mode");
- if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
- return createError("Unsupported NVPTX architecture");
+ if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) {
+ if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
+ return createError("invalid CUDA addressing mode");
+ if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
+ return createError("unsupported NVPTX architecture");
+ } else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) {
+ if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100)
+ return createError("unsupported NVPTX architecture");
+ } else {
+ return createError("invalid CUDA ABI version");
+ }
}
return Header.e_machine == EMachine;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b787376eb1770..71a28fadfd81d 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
return ElfOrErr.takeError();
// Get the numeric value for the image's `sm_` value.
- auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
+ const auto Header = ElfOrErr->getELFFile().getHeader();
+ unsigned SM =
+ Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
+ ? Header.e_flags & ELF::EF_CUDA_SM
+ : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET;
CUdevice Device;
CUresult Res = cuDeviceGet(&Device, DeviceId);
``````````
</details>
https://github.com/llvm/llvm-project/pull/159451
More information about the llvm-branch-commits
mailing list