[llvm] [Offload] Add support for measuring elapsed time between events (PR #186856)

Tue Mar 31 06:48:21 PDT 2026

================
@@ -714,27 +717,49 @@ using AMDGPUSignalManagerTy = GenericDeviceResourceManagerTy<AMDGPUSignalRef>;
 /// Class holding an HSA queue to submit kernel and barrier packets.
 struct AMDGPUQueueTy {
   /// Create an empty queue.
-  AMDGPUQueueTy() : Queue(nullptr), Mutex(), NumUsers(0) {}
+  AMDGPUQueueTy()
+      : Queue(nullptr), ProfilingEnabled(false), Mutex(), NumUsers(0) {}
 
   /// Lazily initialize a new queue belonging to a specific agent.
   Error init(GenericDeviceTy &Device, hsa_agent_t Agent, int32_t QueueSize) {
     if (Queue)
       return Plugin::success();
+
     hsa_status_t Status =
         hsa_queue_create(Agent, QueueSize, HSA_QUEUE_TYPE_MULTI, callbackError,
                          &Device, UINT32_MAX, UINT32_MAX, &Queue);
-    return Plugin::check(Status, "error in hsa_queue_create: %s");
+    if (auto Err = Plugin::check(Status, "error in hsa_queue_create: %s"))
+      return Err;
+
+    // Keep queue profiling enabled from creation time onward, as HIP/ROCclr
+    // does. Elapsed-time queries rely on queue-level hardware profiling support
+    // to retrieve packet timing.
+    //
+    // If enabling profiling fails, leave the queue usable and preserve existing
+    // event behavior; only elapsed-time queries will later report that timing
+    // is unavailable.
+    Status = hsa_amd_profiling_set_profiler_enabled(Queue, 1);
----------------
leandrolcampos wrote:

Done, thanks.

https://github.com/llvm/llvm-project/pull/186856