[Mlir-commits] [llvm] [mlir] [MLIR][NVVM] Add globaltimer_lo support in NVVM Dialect and NVPTX backend (PR #154672)

Dharuni R Acharya llvmlistbot at llvm.org
Wed Aug 20 22:33:26 PDT 2025


https://github.com/DharuniRAcharya created https://github.com/llvm/llvm-project/pull/154672

This patch adds support for reading the global timer low register in the NVVM dialect and NVPTX backend. 
This change includes addition of `NVVM_GlobalTimerLoOp` operation to NVVM dialect and `int_nvvm_read_ptx_sreg_globaltimer_lo` intrinsic to NVPTX backend.
All the lit tests have been added.

>From 5edc271751743970ddeb4a67e9cf5469d3b6be13 Mon Sep 17 00:00:00 2001
From: Dharuni R Acharya <dharunira at nvidia.com>
Date: Thu, 21 Aug 2025 04:59:19 +0000
Subject: [PATCH] [MLIR][NVPTX] Expose globaltimer_lo in NVVM Dialect and Add
 corresponding intrinsics in NVPTX backend

---
 llvm/include/llvm/IR/IntrinsicsNVVM.td      |  1 +
 llvm/lib/Target/NVPTX/NVPTXIntrinsics.td    |  2 +
 llvm/test/CodeGen/NVPTX/intrinsics.ll       | 17 ++++
 mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td |  1 +
 mlir/test/Target/LLVMIR/nvvmir.mlir         | 90 +++++++++++----------
 5 files changed, 67 insertions(+), 44 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 1bcc442a3f77f..49eaf9770b717 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1792,6 +1792,7 @@ def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32;
 def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64;
 
 def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64;
+def int_nvvm_read_ptx_sreg_globaltimer_lo : PTXReadNCSRegIntrinsic_r32;
 
 def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32;
 def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 721afae4db51c..c280b68d2298d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -4358,10 +4358,12 @@ let hasSideEffects = 1 in {
   def SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
   def SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
   def SREG_GLOBALTIMER : PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
+  def SREG_GLOBALTIMER_LO : PTX_READ_SREG_R32<"globaltimer_lo", int_nvvm_read_ptx_sreg_globaltimer_lo>;
 }
 
 def: Pat <(i64 (readcyclecounter)), (SREG_CLOCK64)>;
 def: Pat <(i64 (readsteadycounter)), (SREG_GLOBALTIMER)>;
+def: Pat <(i32 (readsteadycounter)), (SREG_GLOBALTIMER_LO)>;
 
 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 4ed50632251cb..6bdb8ead7a64a 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -267,6 +267,23 @@ define i64 @test_globaltimer() {
   ret i64 %ret
 }
 
+define i32 @test_globaltimer_lo(){
+; CHECK-LABEL: test_globaltimer_lo(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u32 %r1, %globaltimer_lo;
+; CHECK-NEXT:    mov.u32 %r2, %globaltimer_lo;
+; CHECK-NEXT:    add.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %a = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+  %b = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+  %ret = add i32 %a, %b
+  ret i32 %ret
+}
+
 define i64 @test_cyclecounter() {
 ; CHECK-LABEL: test_cyclecounter(
 ; CHECK:       {
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index f9cd58de8915f..a3c440be9489a 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -258,6 +258,7 @@ def NVVM_ClusterDim : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster
 def NVVM_ClockOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock">;
 def NVVM_Clock64Op : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock64">;
 def NVVM_GlobalTimerOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer">;
+def NVVM_GlobalTimerLoOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer.lo">;
 
 //===----------------------------------------------------------------------===//
 // envreg registers
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index c8ba91efbff4d..22e6b648068a2 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -64,92 +64,94 @@ llvm.func @nvvm_special_regs() -> i32 {
   %30 = nvvm.read.ptx.sreg.clock64 : i64
   // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer
   %31 = nvvm.read.ptx.sreg.globaltimer : i64
-  // CHECK: %32 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-  %32 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 64> : i32
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+  %32 = nvvm.read.ptx.sreg.globaltimer.lo : i32
+  // CHECK: %33 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %33 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 64> : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid
-  %33 = nvvm.read.ptx.sreg.warpid : i32
+  %34 = nvvm.read.ptx.sreg.warpid : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid
-  %34 = nvvm.read.ptx.sreg.nwarpid : i32
+  %35 = nvvm.read.ptx.sreg.nwarpid : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid
-  %35 = nvvm.read.ptx.sreg.smid : i32
+  %36 = nvvm.read.ptx.sreg.smid : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid
-  %36 = nvvm.read.ptx.sreg.nsmid : i32
+  %37 = nvvm.read.ptx.sreg.nsmid : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid
-  %37 = nvvm.read.ptx.sreg.gridid : i32
+  %38 = nvvm.read.ptx.sreg.gridid : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg0
-  %38 = nvvm.read.ptx.sreg.envreg0 : i32
+  %39 = nvvm.read.ptx.sreg.envreg0 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg1
-  %39 = nvvm.read.ptx.sreg.envreg1 : i32
+  %40 = nvvm.read.ptx.sreg.envreg1 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg2
-  %40 = nvvm.read.ptx.sreg.envreg2 : i32
+  %41 = nvvm.read.ptx.sreg.envreg2 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg3
-  %41 = nvvm.read.ptx.sreg.envreg3 : i32
+  %42 = nvvm.read.ptx.sreg.envreg3 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg4
-  %42 = nvvm.read.ptx.sreg.envreg4 : i32
+  %43 = nvvm.read.ptx.sreg.envreg4 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg5
-  %43 = nvvm.read.ptx.sreg.envreg5 : i32
+  %44 = nvvm.read.ptx.sreg.envreg5 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg6
-  %44 = nvvm.read.ptx.sreg.envreg6 : i32
+  %45 = nvvm.read.ptx.sreg.envreg6 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg7
-  %45 = nvvm.read.ptx.sreg.envreg7 : i32
+  %46 = nvvm.read.ptx.sreg.envreg7 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg8
-  %46 = nvvm.read.ptx.sreg.envreg8 : i32
+  %47 = nvvm.read.ptx.sreg.envreg8 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg9
-  %47 = nvvm.read.ptx.sreg.envreg9 : i32
+  %48 = nvvm.read.ptx.sreg.envreg9 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg10
-  %48 = nvvm.read.ptx.sreg.envreg10 : i32
+  %49 = nvvm.read.ptx.sreg.envreg10 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg11
-  %49 = nvvm.read.ptx.sreg.envreg11 : i32
+  %50 = nvvm.read.ptx.sreg.envreg11 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg12
-  %50 = nvvm.read.ptx.sreg.envreg12 : i32
+  %51 = nvvm.read.ptx.sreg.envreg12 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg13
-  %51 = nvvm.read.ptx.sreg.envreg13 : i32
+  %52 = nvvm.read.ptx.sreg.envreg13 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg14
-  %52 = nvvm.read.ptx.sreg.envreg14 : i32
+  %53 = nvvm.read.ptx.sreg.envreg14 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg15
-  %53 = nvvm.read.ptx.sreg.envreg15 : i32
+  %54 = nvvm.read.ptx.sreg.envreg15 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg16
-  %54 = nvvm.read.ptx.sreg.envreg16 : i32
+  %55 = nvvm.read.ptx.sreg.envreg16 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg17
-  %55 = nvvm.read.ptx.sreg.envreg17 : i32
+  %56 = nvvm.read.ptx.sreg.envreg17 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg18
-  %56 = nvvm.read.ptx.sreg.envreg18 : i32
+  %57 = nvvm.read.ptx.sreg.envreg18 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg19
-  %57 = nvvm.read.ptx.sreg.envreg19 : i32
+  %58 = nvvm.read.ptx.sreg.envreg19 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg20
-  %58 = nvvm.read.ptx.sreg.envreg20 : i32
+  %59 = nvvm.read.ptx.sreg.envreg20 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg21
-  %59 = nvvm.read.ptx.sreg.envreg21 : i32
+  %60 = nvvm.read.ptx.sreg.envreg21 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg22
-  %60 = nvvm.read.ptx.sreg.envreg22 : i32
+  %61 = nvvm.read.ptx.sreg.envreg22 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg23
-  %61 = nvvm.read.ptx.sreg.envreg23 : i32
+  %62 = nvvm.read.ptx.sreg.envreg23 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg24
-  %62 = nvvm.read.ptx.sreg.envreg24 : i32
+  %63 = nvvm.read.ptx.sreg.envreg24 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg25
-  %63 = nvvm.read.ptx.sreg.envreg25 : i32
+  %64 = nvvm.read.ptx.sreg.envreg25 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg26
-  %64 = nvvm.read.ptx.sreg.envreg26 : i32
+  %65 = nvvm.read.ptx.sreg.envreg26 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg27
-  %65 = nvvm.read.ptx.sreg.envreg27 : i32
+  %66 = nvvm.read.ptx.sreg.envreg27 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg28
-  %66 = nvvm.read.ptx.sreg.envreg28 : i32
+  %67 = nvvm.read.ptx.sreg.envreg28 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg29
-  %67 = nvvm.read.ptx.sreg.envreg29 : i32
+  %68 = nvvm.read.ptx.sreg.envreg29 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg30
-  %68 = nvvm.read.ptx.sreg.envreg30 : i32
+  %69 = nvvm.read.ptx.sreg.envreg30 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg31
-  %69 = nvvm.read.ptx.sreg.envreg31 : i32
+  %70 = nvvm.read.ptx.sreg.envreg31 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq
-  %70 = nvvm.read.ptx.sreg.lanemask.eq : i32
+  %71 = nvvm.read.ptx.sreg.lanemask.eq : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le
-  %71 = nvvm.read.ptx.sreg.lanemask.le : i32
+  %72 = nvvm.read.ptx.sreg.lanemask.le : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt
-  %72 = nvvm.read.ptx.sreg.lanemask.lt : i32
+  %73 = nvvm.read.ptx.sreg.lanemask.lt : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge
-  %73 = nvvm.read.ptx.sreg.lanemask.ge : i32
+  %74 = nvvm.read.ptx.sreg.lanemask.ge : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt
-  %74 = nvvm.read.ptx.sreg.lanemask.gt : i32
+  %75 = nvvm.read.ptx.sreg.lanemask.gt : i32
   llvm.return %1 : i32
 }
 



More information about the Mlir-commits mailing list