[clang] a63a548 - [Driver] Enable -ftime-trace for CUDA/HIP device compilation (#179701)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 13 20:16:45 PDT 2026
Author: Yaxun (Sam) Liu
Date: 2026-03-13T23:16:41-04:00
New Revision: a63a548b5ba6eeb3adf55eefcfa08ec742af8550
URL: https://github.com/llvm/llvm-project/commit/a63a548b5ba6eeb3adf55eefcfa08ec742af8550
DIFF: https://github.com/llvm/llvm-project/commit/a63a548b5ba6eeb3adf55eefcfa08ec742af8550.diff
LOG: [Driver] Enable -ftime-trace for CUDA/HIP device compilation (#179701)
Previously, -ftime-trace only generated trace files for host compilation
when compiling CUDA/HIP code. Device compilation was excluded because
the OffloadingPrefix was non-empty, causing handleTimeTrace() to be
skipped.
This patch enables -ftime-trace for offload device compilation by:
1. Passing the offloading prefix to handleTimeTrace()
2. Including the bound architecture in the trace filename
3. Deriving the trace output directory from the -o option for device
compilation (since the device output is a temp file)
Trace files are now generated for each offload target:
- Host: output.json
- Device: output-hip-amdgcn-amd-amdhsa-gfx906.json
Note: When using --save-temps, multiple compilation phases (preprocess,
compile, codegen) write to the same trace file, with each phase
overwriting the previous. This is pre-existing behavior that also
affects regular C++ compilation and is not addressed by this patch.
This addresses a long-standing limitation noted in D150282.
Added:
clang/test/Driver/ftime-trace-offload.cpp
Modified:
clang/lib/Driver/Driver.cpp
Removed:
################################################################################
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 1a80aedfd8f0a..b7f65b7b74401 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5904,20 +5904,50 @@ static void handleTimeTrace(Compilation &C, const ArgList &Args,
Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
if (!A)
return;
+
+ SmallString<64> OffloadingPrefix;
+ if (JA->getOffloadingDeviceKind() != Action::OFK_None) {
+ const ToolChain *TC = JA->getOffloadingToolChain();
+ OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+ JA->getOffloadingDeviceKind(), TC ? TC->getTriple().normalize() : "",
+ /*CreatePrefixForHost=*/false);
+ if (const char *Arch = JA->getOffloadingArch()) {
+ OffloadingPrefix += "-";
+ OffloadingPrefix += Arch;
+ }
+ } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None &&
+ C.getDriver().isSaveTempsEnabled()) {
+ OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+ Action::OFK_None, C.getDefaultToolChain().getTriple().normalize(),
+ /*CreatePrefixForHost=*/true);
+ }
+
SmallString<128> Path;
if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
Path = A->getValue();
if (llvm::sys::fs::is_directory(Path)) {
- SmallString<128> Tmp(Result.getFilename());
- llvm::sys::path::replace_extension(Tmp, "json");
- llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+ SmallString<128> Tmp(OffloadingPrefix.empty()
+ ? llvm::sys::path::stem(Result.getFilename())
+ : llvm::sys::path::stem(BaseInput));
+ Tmp += OffloadingPrefix;
+ Tmp += ".json";
+ llvm::sys::path::append(Path, Tmp);
}
} else {
if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
- // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may not
- // end with a path separator.
+ // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+ // that dumpdir may not end with a path separator.
Path = DumpDir->getValue();
- Path += llvm::sys::path::filename(BaseInput);
+ Path += llvm::sys::path::stem(BaseInput);
+ Path += OffloadingPrefix;
+ } else if (!OffloadingPrefix.empty()) {
+ // For offloading, derive path from -o output directory combined with
+ // the input filename and offload prefix.
+ SmallString<128> TraceName(llvm::sys::path::stem(BaseInput));
+ TraceName += OffloadingPrefix;
+ if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
+ Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+ llvm::sys::path::append(Path, TraceName);
} else {
Path = Result.getFilename();
}
@@ -6178,7 +6208,7 @@ InputInfoList Driver::BuildJobsForActionNoCache(
AtTopLevel, MultipleArchs,
OffloadingPrefix),
BaseInput);
- if (T->canEmitIR() && OffloadingPrefix.empty())
+ if (T->canEmitIR())
handleTimeTrace(C, Args, JA, BaseInput, Result);
}
diff --git a/clang/test/Driver/ftime-trace-offload.cpp b/clang/test/Driver/ftime-trace-offload.cpp
new file mode 100644
index 0000000000000..2305d7bf4e04d
--- /dev/null
+++ b/clang/test/Driver/ftime-trace-offload.cpp
@@ -0,0 +1,37 @@
+// RUN: rm -rf %t && mkdir -p %t && cd %t
+// RUN: mkdir d e f && cp %s d/a.cpp
+
+/// Test HIP offloading: -ftime-trace should generate traces for both host and device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN: | FileCheck %s --check-prefix=HIP
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx90a.json"
+// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a.json"
+
+/// Test HIP offloading with new driver: same output as above.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu --offload-new-driver 2>&1 \
+// RUN: | FileCheck %s --check-prefix=HIP
+
+/// Test HIP offloading with -ftime-trace=<dir>: traces go to specified directory.
+// RUN: %clang -### -ftime-trace=f -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \
+// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN: | FileCheck %s --check-prefix=HIP-DIR
+// HIP-DIR: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=f{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-DIR: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=f{{/|\\\\}}a.json"
+
+/// Test HIP offloading with --save-temps: both host and device get unique trace files.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \
+// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu --save-temps 2>&1 \
+// RUN: | FileCheck %s --check-prefix=HIP-SAVE-TEMPS
+// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a-host-x86_64-unknown-linux-gnu.json"
+
+/// Test CUDA offloading: -ftime-trace should generate traces for both host and device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp --offload-arch=sm_70 --offload-arch=sm_80 \
+// RUN: -c -o e/a.o --target=x86_64-linux-gnu --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CUDA
+// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e{{/|\\\\}}a-cuda-nvptx64-nvidia-cuda-sm_70.json"
+// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e{{/|\\\\}}a-cuda-nvptx64-nvidia-cuda-sm_80.json"
+// CUDA: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a.json"
More information about the cfe-commits
mailing list