https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/179701

>From 7713b806be79a6e15aab1dcfb9a8b6f8b96db579 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <[email protected]>
Date: Wed, 4 Feb 2026 09:50:54 -0500
Subject: [PATCH] [Driver] Enable -ftime-trace for CUDA/HIP device compilation

Previously, -ftime-trace only generated trace files for host compilation
when compiling CUDA/HIP code. Device compilation was excluded because
handleTimeTrace() had no offloading context.

This patch enables -ftime-trace for offload device compilation by having
handleTimeTrace() derive the offloading prefix internally from the
JobAction, ToolChain, and BoundArch, following the same pattern as
GetOffloadingFileNamePrefix used for output filenames. The bound
architecture is appended to ensure unique trace files per offload target.
For device compilation, the trace output directory is derived from the
-o option since the device output is a temp file.

Trace files are now generated for each offload target:
- Host: output.json
- Device: output-hip-amdgcn-amd-amdhsa-gfx906.json

Note: When using --save-temps, multiple compilation phases (preprocess,
compile, codegen) write to the same trace file, with each phase
overwriting the previous. This is pre-existing behavior that also
affects regular C++ compilation and is not addressed by this patch.

This addresses a long-standing limitation noted in D150282.
---
 clang/lib/Driver/Driver.cpp       | 64 ++++++++++++++++++++++++++-----
 clang/test/Driver/ftime-trace.cpp | 35 +++++++++++++++++
 2 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index a55c5033b57cf..67d9e11bc420d 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5842,27 +5842,70 @@ InputInfoList Driver::BuildJobsForAction(
 
 static void handleTimeTrace(Compilation &C, const ArgList &Args,
                             const JobAction *JA, const char *BaseInput,
-                            const InputInfo &Result) {
+                            const InputInfo &Result, const ToolChain *TC,
+                            StringRef BoundArch, bool AtTopLevel) {
   Arg *A =
       Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
     return;
+
+  // Build the offloading prefix from the job action's offloading context.
+  // For device actions, this produces e.g. "-hip-amdgcn-amd-amdhsa-gfx906".
+  // For host actions that are not at top level (e.g. --save-temps with
+  // offloading), this produces e.g. "-host-x86_64-unknown-linux-gnu".
+  // For top-level host actions, no prefix is generated.
+  std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+      JA->getOffloadingDeviceKind(),
+      TC ? TC->getTriple().normalize() : "",
+      /*CreatePrefixForHost=*/
+          !(JA->getOffloadingHostActiveKinds() == Action::OFK_None ||
+            AtTopLevel));
+  if (!OffloadingPrefix.empty() && !BoundArch.empty()) {
+    OffloadingPrefix += "-";
+    OffloadingPrefix += BoundArch;
+  }
+
   SmallString<128> Path;
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
     Path = A->getValue();
     if (llvm::sys::fs::is_directory(Path)) {
-      SmallString<128> Tmp(Result.getFilename());
-      llvm::sys::path::replace_extension(Tmp, "json");
-      llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+      // When -ftime-trace=<dir> and it's a directory:
+      // - For host/non-offload: use the output filename stem
+      // - For offload: use input filename stem + offloading prefix
+      SmallString<128> Tmp;
+      if (OffloadingPrefix.empty()) {
+        Tmp = llvm::sys::path::stem(Result.getFilename());
+      } else {
+        Tmp = llvm::sys::path::stem(BaseInput);
+        Tmp += OffloadingPrefix;
+      }
+      Tmp += ".json";
+      llvm::sys::path::append(Path, Tmp);
     }
   } else {
     if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-      // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-      // end with a path separator.
+      // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+      // that dumpdir may not end with a path separator.
       Path = DumpDir->getValue();
-      Path += llvm::sys::path::filename(BaseInput);
+      Path += llvm::sys::path::stem(BaseInput);
+      Path += OffloadingPrefix;
+    } else if (!OffloadingPrefix.empty()) {
+      // For offloading, derive path from -o option or use current directory.
+      // The Result filename may be a temp file, so we use the -o output
+      // directory combined with the input filename and offload prefix.
+      if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) {
+        Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+        if (!Path.empty())
+          Path += llvm::sys::path::get_separator();
+      }
+      Path += llvm::sys::path::stem(BaseInput);
+      Path += OffloadingPrefix;
     } else {
-      Path = Result.getFilename();
+      // Use the output filename stem for the trace file.
+      Path = llvm::sys::path::parent_path(Result.getFilename());
+      if (!Path.empty())
+        Path += llvm::sys::path::get_separator();
+      Path += llvm::sys::path::stem(Result.getFilename());
     }
     llvm::sys::path::replace_extension(Path, "json");
   }
@@ -6121,8 +6164,9 @@ InputInfoList Driver::BuildJobsForActionNoCache(
                                              AtTopLevel, MultipleArchs,
                                              OffloadingPrefix),
                        BaseInput);
-    if (T->canEmitIR() && OffloadingPrefix.empty())
-      handleTimeTrace(C, Args, JA, BaseInput, Result);
+    if (T->canEmitIR())
+      handleTimeTrace(C, Args, JA, BaseInput, Result, TC, BoundArch,
+                      AtTopLevel);
   }
 
   if (CCCPrintBindings && !CCGenDiagnostics) {
diff --git a/clang/test/Driver/ftime-trace.cpp 
b/clang/test/Driver/ftime-trace.cpp
index 60c5885704b58..05edf671dfe2d 100644
--- a/clang/test/Driver/ftime-trace.cpp
+++ b/clang/test/Driver/ftime-trace.cpp
@@ -63,6 +63,41 @@
 // UNUSED-NEXT: warning: argument unused during compilation: 
'-ftime-trace-verbose'
 // UNUSED-NOT:  warning:
 
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx90a.json"
+// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json"
+
+/// Test HIP offloading with new driver: same output as above.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu --offload-new-driver 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+
+/// Test HIP offloading with -ftime-trace=<dir>: traces go to specified 
directory.
+// RUN: %clang -### -ftime-trace=f -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP-DIR
+// HIP-DIR: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=f{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-DIR: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=f{{/|\\\\}}a.json"
+
+/// Test HIP offloading with --save-temps: both host and device get unique 
trace files.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu --save-temps 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP-SAVE-TEMPS
+// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=e/a-host-x86_64-unknown-linux-gnu.json"
+
+/// Test CUDA offloading: -ftime-trace should generate traces for both host 
and device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp 
--offload-arch=sm_70 --offload-arch=sm_80 \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 
--cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CUDA
+// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} 
"-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_70.json"
+// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} 
"-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_80.json"
+// CUDA: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json"
+
 template <typename T>
 struct Struct {
   T Num;

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to