[Mlir-commits] [mlir] 0263c15 - [MLIR][NVVM] Add globaltimer_lo support in NVVM Dialect and NVPTX backend (#154672)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Mon Aug 25 00:42:19 PDT 2025


Author: Dharuni R Acharya
Date: 2025-08-25T13:12:16+05:30
New Revision: 0263c15f874d4fc33308db93023e583b816b920e

URL: https://github.com/llvm/llvm-project/commit/0263c15f874d4fc33308db93023e583b816b920e
DIFF: https://github.com/llvm/llvm-project/commit/0263c15f874d4fc33308db93023e583b816b920e.diff

LOG: [MLIR][NVVM] Add globaltimer_lo support in NVVM Dialect and NVPTX backend (#154672)

This patch adds support for reading the global timer low register in the
NVVM dialect and NVPTX backend. This change includes adding the
`NVVM_GlobalTimerLoOp` operation to NVVM dialect and 
`int_nvvm_read_ptx_sreg_globaltimer_lo` intrinsic to the NVPTX backend.

All the lit tests have been added.

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsNVVM.td
    llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
    llvm/test/CodeGen/NVPTX/intrinsics.ll
    mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
    mlir/test/Target/LLVMIR/nvvmir.mlir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 130fa27e4f870..7b40841e45d0d 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1943,6 +1943,7 @@ def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
 def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic<llvm_i64_ty>;
 
 def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic<llvm_i64_ty>;
+def int_nvvm_read_ptx_sreg_globaltimer_lo : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
 
 def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
 def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;

diff  --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 4ab30a5b5f5e7..cba14066f0c0b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -4358,10 +4358,12 @@ let hasSideEffects = 1 in {
   def SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
   def SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
   def SREG_GLOBALTIMER : PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
+  def SREG_GLOBALTIMER_LO : PTX_READ_SREG_R32<"globaltimer_lo", int_nvvm_read_ptx_sreg_globaltimer_lo>;
 }
 
 def: Pat <(i64 (readcyclecounter)), (SREG_CLOCK64)>;
 def: Pat <(i64 (readsteadycounter)), (SREG_GLOBALTIMER)>;
+def: Pat <(i32 (readsteadycounter)), (SREG_GLOBALTIMER_LO)>;
 
 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;

diff  --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 4ed50632251cb..6bdb8ead7a64a 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -267,6 +267,23 @@ define i64 @test_globaltimer() {
   ret i64 %ret
 }
 
+define i32 @test_globaltimer_lo(){
+; CHECK-LABEL: test_globaltimer_lo(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.u32 %r1, %globaltimer_lo;
+; CHECK-NEXT:    mov.u32 %r2, %globaltimer_lo;
+; CHECK-NEXT:    add.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %a = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+  %b = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+  %ret = add i32 %a, %b
+  ret i32 %ret
+}
+
 define i64 @test_cyclecounter() {
 ; CHECK-LABEL: test_cyclecounter(
 ; CHECK:       {

diff  --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 380302e26b1cb..85e6e064f22a9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -267,6 +267,7 @@ def NVVM_ClusterDim : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster
 def NVVM_ClockOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock">;
 def NVVM_Clock64Op : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock64">;
 def NVVM_GlobalTimerOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer">;
+def NVVM_GlobalTimerLoOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer.lo">;
 
 //===----------------------------------------------------------------------===//
 // envreg registers

diff  --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 5f0bda3aa2dfe..fa7dd1daf96ed 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -64,92 +64,94 @@ llvm.func @nvvm_special_regs() -> i32 {
   %30 = nvvm.read.ptx.sreg.clock64 : i64
   // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer
   %31 = nvvm.read.ptx.sreg.globaltimer : i64
-  // CHECK: %32 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-  %32 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 64> : i32
+  // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+  %32 = nvvm.read.ptx.sreg.globaltimer.lo : i32
+  // CHECK: %33 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %33 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 64> : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid
-  %33 = nvvm.read.ptx.sreg.warpid : i32
+  %34 = nvvm.read.ptx.sreg.warpid : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid
-  %34 = nvvm.read.ptx.sreg.nwarpid : i32
+  %35 = nvvm.read.ptx.sreg.nwarpid : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid
-  %35 = nvvm.read.ptx.sreg.smid : i32
+  %36 = nvvm.read.ptx.sreg.smid : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid
-  %36 = nvvm.read.ptx.sreg.nsmid : i32
+  %37 = nvvm.read.ptx.sreg.nsmid : i32
   // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid
-  %37 = nvvm.read.ptx.sreg.gridid : i32
+  %38 = nvvm.read.ptx.sreg.gridid : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg0
-  %38 = nvvm.read.ptx.sreg.envreg0 : i32
+  %39 = nvvm.read.ptx.sreg.envreg0 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg1
-  %39 = nvvm.read.ptx.sreg.envreg1 : i32
+  %40 = nvvm.read.ptx.sreg.envreg1 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg2
-  %40 = nvvm.read.ptx.sreg.envreg2 : i32
+  %41 = nvvm.read.ptx.sreg.envreg2 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg3
-  %41 = nvvm.read.ptx.sreg.envreg3 : i32
+  %42 = nvvm.read.ptx.sreg.envreg3 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg4
-  %42 = nvvm.read.ptx.sreg.envreg4 : i32
+  %43 = nvvm.read.ptx.sreg.envreg4 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg5
-  %43 = nvvm.read.ptx.sreg.envreg5 : i32
+  %44 = nvvm.read.ptx.sreg.envreg5 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg6
-  %44 = nvvm.read.ptx.sreg.envreg6 : i32
+  %45 = nvvm.read.ptx.sreg.envreg6 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg7
-  %45 = nvvm.read.ptx.sreg.envreg7 : i32
+  %46 = nvvm.read.ptx.sreg.envreg7 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg8
-  %46 = nvvm.read.ptx.sreg.envreg8 : i32
+  %47 = nvvm.read.ptx.sreg.envreg8 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg9
-  %47 = nvvm.read.ptx.sreg.envreg9 : i32
+  %48 = nvvm.read.ptx.sreg.envreg9 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg10
-  %48 = nvvm.read.ptx.sreg.envreg10 : i32
+  %49 = nvvm.read.ptx.sreg.envreg10 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg11
-  %49 = nvvm.read.ptx.sreg.envreg11 : i32
+  %50 = nvvm.read.ptx.sreg.envreg11 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg12
-  %50 = nvvm.read.ptx.sreg.envreg12 : i32
+  %51 = nvvm.read.ptx.sreg.envreg12 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg13
-  %51 = nvvm.read.ptx.sreg.envreg13 : i32
+  %52 = nvvm.read.ptx.sreg.envreg13 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg14
-  %52 = nvvm.read.ptx.sreg.envreg14 : i32
+  %53 = nvvm.read.ptx.sreg.envreg14 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg15
-  %53 = nvvm.read.ptx.sreg.envreg15 : i32
+  %54 = nvvm.read.ptx.sreg.envreg15 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg16
-  %54 = nvvm.read.ptx.sreg.envreg16 : i32
+  %55 = nvvm.read.ptx.sreg.envreg16 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg17
-  %55 = nvvm.read.ptx.sreg.envreg17 : i32
+  %56 = nvvm.read.ptx.sreg.envreg17 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg18
-  %56 = nvvm.read.ptx.sreg.envreg18 : i32
+  %57 = nvvm.read.ptx.sreg.envreg18 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg19
-  %57 = nvvm.read.ptx.sreg.envreg19 : i32
+  %58 = nvvm.read.ptx.sreg.envreg19 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg20
-  %58 = nvvm.read.ptx.sreg.envreg20 : i32
+  %59 = nvvm.read.ptx.sreg.envreg20 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg21
-  %59 = nvvm.read.ptx.sreg.envreg21 : i32
+  %60 = nvvm.read.ptx.sreg.envreg21 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg22
-  %60 = nvvm.read.ptx.sreg.envreg22 : i32
+  %61 = nvvm.read.ptx.sreg.envreg22 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg23
-  %61 = nvvm.read.ptx.sreg.envreg23 : i32
+  %62 = nvvm.read.ptx.sreg.envreg23 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg24
-  %62 = nvvm.read.ptx.sreg.envreg24 : i32
+  %63 = nvvm.read.ptx.sreg.envreg24 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg25
-  %63 = nvvm.read.ptx.sreg.envreg25 : i32
+  %64 = nvvm.read.ptx.sreg.envreg25 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg26
-  %64 = nvvm.read.ptx.sreg.envreg26 : i32
+  %65 = nvvm.read.ptx.sreg.envreg26 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg27
-  %65 = nvvm.read.ptx.sreg.envreg27 : i32
+  %66 = nvvm.read.ptx.sreg.envreg27 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg28
-  %66 = nvvm.read.ptx.sreg.envreg28 : i32
+  %67 = nvvm.read.ptx.sreg.envreg28 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg29
-  %67 = nvvm.read.ptx.sreg.envreg29 : i32
+  %68 = nvvm.read.ptx.sreg.envreg29 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg30
-  %68 = nvvm.read.ptx.sreg.envreg30 : i32
+  %69 = nvvm.read.ptx.sreg.envreg30 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg31
-  %69 = nvvm.read.ptx.sreg.envreg31 : i32
+  %70 = nvvm.read.ptx.sreg.envreg31 : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq
-  %70 = nvvm.read.ptx.sreg.lanemask.eq : i32
+  %71 = nvvm.read.ptx.sreg.lanemask.eq : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le
-  %71 = nvvm.read.ptx.sreg.lanemask.le : i32
+  %72 = nvvm.read.ptx.sreg.lanemask.le : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt
-  %72 = nvvm.read.ptx.sreg.lanemask.lt : i32
+  %73 = nvvm.read.ptx.sreg.lanemask.lt : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge
-  %73 = nvvm.read.ptx.sreg.lanemask.ge : i32
+  %74 = nvvm.read.ptx.sreg.lanemask.ge : i32
   //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt
-  %74 = nvvm.read.ptx.sreg.lanemask.gt : i32
+  %75 = nvvm.read.ptx.sreg.lanemask.gt : i32
   llvm.return %1 : i32
 }
 


        


More information about the Mlir-commits mailing list