[clang] 12eab1a - [CUDA] Use --image3 to construct fat binary (#151760)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 1 13:14:17 PDT 2025
Author: Artem Belevich
Date: 2025-08-01T13:14:14-07:00
New Revision: 12eab1a7b82d8db0e781862902b674332298162f
URL: https://github.com/llvm/llvm-project/commit/12eab1a7b82d8db0e781862902b674332298162f
DIFF: https://github.com/llvm/llvm-project/commit/12eab1a7b82d8db0e781862902b674332298162f.diff
LOG: [CUDA] Use --image3 to construct fat binary (#151760)
CUDA-12.9 has removed fatbinary tool's `--image` argument we've been
using till now.
--image3 has been supported since cuda-9, so we do not need CUDA SDK
version checks.
Added:
Modified:
clang/lib/Driver/ToolChains/Cuda.cpp
clang/test/Driver/cuda-arch-translation.cu
clang/test/Driver/cuda-options.cu
Removed:
################################################################################
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 1f0b478c02b25..fdfcea852b4f2 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -549,22 +549,16 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
auto *A = II.getAction();
assert(A->getInputs().size() == 1 &&
"Device offload action is expected to have a single input");
- const char *gpu_arch_str = A->getOffloadingArch();
- assert(gpu_arch_str &&
+ StringRef GpuArch = A->getOffloadingArch();
+ assert(!GpuArch.empty() &&
"Device action expected to have associated a GPU architecture!");
- OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str);
- if (II.getType() == types::TY_PP_Asm &&
- !shouldIncludePTX(Args, gpu_arch_str))
+ if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, GpuArch))
continue;
- // We need to pass an Arch of the form "sm_XX" for cubin files and
- // "compute_XX" for ptx.
- const char *Arch = (II.getType() == types::TY_PP_Asm)
- ? OffloadArchToVirtualArchString(gpu_arch)
- : gpu_arch_str;
- CmdArgs.push_back(
- Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
- ",file=" + getToolChain().getInputFilename(II)));
+ StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf";
+ CmdArgs.push_back(Args.MakeArgString(
+ "--image3=kind=" + Kind + ",sm=" + GpuArch.drop_front(3) +
+ ",file=" + getToolChain().getInputFilename(II)));
}
for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index e4f83740a92eb..b4a521df2e6f5 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -68,19 +68,19 @@
// HIP: clang-offload-bundler
-// SM20:--image=profile=sm_20{{.*}}
-// SM21:--image=profile=sm_21{{.*}}
-// SM30:--image=profile=sm_30{{.*}}
-// SM32:--image=profile=sm_32{{.*}}
-// SM35:--image=profile=sm_35{{.*}}
-// SM37:--image=profile=sm_37{{.*}}
-// SM50:--image=profile=sm_50{{.*}}
-// SM52:--image=profile=sm_52{{.*}}
-// SM53:--image=profile=sm_53{{.*}}
-// SM60:--image=profile=sm_60{{.*}}
-// SM61:--image=profile=sm_61{{.*}}
-// SM62:--image=profile=sm_62{{.*}}
-// SM70:--image=profile=sm_70{{.*}}
+// SM20:--image3=kind=elf,sm=20{{.*}}
+// SM21:--image3=kind=elf,sm=21{{.*}}
+// SM30:--image3=kind=elf,sm=30{{.*}}
+// SM32:--image3=kind=elf,sm=32{{.*}}
+// SM35:--image3=kind=elf,sm=35{{.*}}
+// SM37:--image3=kind=elf,sm=37{{.*}}
+// SM50:--image3=kind=elf,sm=50{{.*}}
+// SM52:--image3=kind=elf,sm=52{{.*}}
+// SM53:--image3=kind=elf,sm=53{{.*}}
+// SM60:--image3=kind=elf,sm=60{{.*}}
+// SM61:--image3=kind=elf,sm=61{{.*}}
+// SM62:--image3=kind=elf,sm=62{{.*}}
+// SM70:--image3=kind=elf,sm=70{{.*}}
// GFX600:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx600
// GFX601:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx601
// GFX602:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx602
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index db6536ca9e03b..fc8e83a2bb279 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -243,10 +243,10 @@
// INCLUDES-DEVICE:fatbinary
// INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
-// INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
-// INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
-// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
-// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
+// INCLUDES-DEVICE-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE]]"
+// INCLUDES-DEVICE-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE]]"
+// INCLUDES-DEVICE2-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE2]]"
+// INCLUDES-DEVICE2-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE2]]"
// Match host-side preprocessor job with -save-temps.
// HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
@@ -288,9 +288,9 @@
// FATBIN-COMMON:fatbinary
// FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
-// FATBIN-COMMON: "--image=profile=sm_52,file=
-// PTX-SM52: "--image=profile=compute_52,file=
-// NOPTX-SM52-NOT: "--image=profile=compute_52,file=
-// FATBIN-COMMON: "--image=profile=sm_60,file=
-// PTX-SM60: "--image=profile=compute_60,file=
-// NOPTX-SM60-NOT: "--image=profile=compute_60,file=
+// FATBIN-COMMON: "--image3=kind=elf,sm=52,file=
+// PTX-SM52: "--image3=kind=ptx,sm=52,file=
+// NOPTX-SM52-NOT: "--image3=kind=ptx,sm=52,file=
+// FATBIN-COMMON: "--image3=kind=elf,sm=60,file=
+// PTX-SM60: "--image3=kind=ptx,sm=60,file=
+// NOPTX-SM60-NOT: "--image3=kind=ptx,sm=60,file=
More information about the cfe-commits
mailing list