[Mlir-commits] [mlir] 0263c15 - [MLIR][NVVM] Add globaltimer_lo support in NVVM Dialect and NVPTX backend (#154672)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Aug 25 00:42:19 PDT 2025
Author: Dharuni R Acharya
Date: 2025-08-25T13:12:16+05:30
New Revision: 0263c15f874d4fc33308db93023e583b816b920e
URL: https://github.com/llvm/llvm-project/commit/0263c15f874d4fc33308db93023e583b816b920e
DIFF: https://github.com/llvm/llvm-project/commit/0263c15f874d4fc33308db93023e583b816b920e.diff
LOG: [MLIR][NVVM] Add globaltimer_lo support in NVVM Dialect and NVPTX backend (#154672)
This patch adds support for reading the global timer low register in the
NVVM dialect and NVPTX backend. This change includes adding the
`NVVM_GlobalTimerLoOp` operation to NVVM dialect and
`int_nvvm_read_ptx_sreg_globaltimer_lo` intrinsic to the NVPTX backend.
All the lit tests have been added.
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsNVVM.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/test/CodeGen/NVPTX/intrinsics.ll
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
mlir/test/Target/LLVMIR/nvvmir.mlir
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 130fa27e4f870..7b40841e45d0d 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1943,6 +1943,7 @@ def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic<llvm_i64_ty>;
def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic<llvm_i64_ty>;
+def int_nvvm_read_ptx_sreg_globaltimer_lo : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic<llvm_i32_ty>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 4ab30a5b5f5e7..cba14066f0c0b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -4358,10 +4358,12 @@ let hasSideEffects = 1 in {
def SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
def SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
def SREG_GLOBALTIMER : PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
+ def SREG_GLOBALTIMER_LO : PTX_READ_SREG_R32<"globaltimer_lo", int_nvvm_read_ptx_sreg_globaltimer_lo>;
}
def: Pat <(i64 (readcyclecounter)), (SREG_CLOCK64)>;
def: Pat <(i64 (readsteadycounter)), (SREG_GLOBALTIMER)>;
+def: Pat <(i32 (readsteadycounter)), (SREG_GLOBALTIMER_LO)>;
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 4ed50632251cb..6bdb8ead7a64a 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -267,6 +267,23 @@ define i64 @test_globaltimer() {
ret i64 %ret
}
+define i32 @test_globaltimer_lo(){
+; CHECK-LABEL: test_globaltimer_lo(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.u32 %r1, %globaltimer_lo;
+; CHECK-NEXT: mov.u32 %r2, %globaltimer_lo;
+; CHECK-NEXT: add.s32 %r3, %r1, %r2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
+ %a = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+ %b = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+ %ret = add i32 %a, %b
+ ret i32 %ret
+}
+
define i64 @test_cyclecounter() {
; CHECK-LABEL: test_cyclecounter(
; CHECK: {
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 380302e26b1cb..85e6e064f22a9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -267,6 +267,7 @@ def NVVM_ClusterDim : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster
def NVVM_ClockOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock">;
def NVVM_Clock64Op : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock64">;
def NVVM_GlobalTimerOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer">;
+def NVVM_GlobalTimerLoOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer.lo">;
//===----------------------------------------------------------------------===//
// envreg registers
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 5f0bda3aa2dfe..fa7dd1daf96ed 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -64,92 +64,94 @@ llvm.func @nvvm_special_regs() -> i32 {
%30 = nvvm.read.ptx.sreg.clock64 : i64
// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer
%31 = nvvm.read.ptx.sreg.globaltimer : i64
- // CHECK: %32 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
- %32 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 64> : i32
+ // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo()
+ %32 = nvvm.read.ptx.sreg.globaltimer.lo : i32
+ // CHECK: %33 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %33 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 64> : i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid
- %33 = nvvm.read.ptx.sreg.warpid : i32
+ %34 = nvvm.read.ptx.sreg.warpid : i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid
- %34 = nvvm.read.ptx.sreg.nwarpid : i32
+ %35 = nvvm.read.ptx.sreg.nwarpid : i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid
- %35 = nvvm.read.ptx.sreg.smid : i32
+ %36 = nvvm.read.ptx.sreg.smid : i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid
- %36 = nvvm.read.ptx.sreg.nsmid : i32
+ %37 = nvvm.read.ptx.sreg.nsmid : i32
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid
- %37 = nvvm.read.ptx.sreg.gridid : i32
+ %38 = nvvm.read.ptx.sreg.gridid : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg0
- %38 = nvvm.read.ptx.sreg.envreg0 : i32
+ %39 = nvvm.read.ptx.sreg.envreg0 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg1
- %39 = nvvm.read.ptx.sreg.envreg1 : i32
+ %40 = nvvm.read.ptx.sreg.envreg1 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg2
- %40 = nvvm.read.ptx.sreg.envreg2 : i32
+ %41 = nvvm.read.ptx.sreg.envreg2 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg3
- %41 = nvvm.read.ptx.sreg.envreg3 : i32
+ %42 = nvvm.read.ptx.sreg.envreg3 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg4
- %42 = nvvm.read.ptx.sreg.envreg4 : i32
+ %43 = nvvm.read.ptx.sreg.envreg4 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg5
- %43 = nvvm.read.ptx.sreg.envreg5 : i32
+ %44 = nvvm.read.ptx.sreg.envreg5 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg6
- %44 = nvvm.read.ptx.sreg.envreg6 : i32
+ %45 = nvvm.read.ptx.sreg.envreg6 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg7
- %45 = nvvm.read.ptx.sreg.envreg7 : i32
+ %46 = nvvm.read.ptx.sreg.envreg7 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg8
- %46 = nvvm.read.ptx.sreg.envreg8 : i32
+ %47 = nvvm.read.ptx.sreg.envreg8 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg9
- %47 = nvvm.read.ptx.sreg.envreg9 : i32
+ %48 = nvvm.read.ptx.sreg.envreg9 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg10
- %48 = nvvm.read.ptx.sreg.envreg10 : i32
+ %49 = nvvm.read.ptx.sreg.envreg10 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg11
- %49 = nvvm.read.ptx.sreg.envreg11 : i32
+ %50 = nvvm.read.ptx.sreg.envreg11 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg12
- %50 = nvvm.read.ptx.sreg.envreg12 : i32
+ %51 = nvvm.read.ptx.sreg.envreg12 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg13
- %51 = nvvm.read.ptx.sreg.envreg13 : i32
+ %52 = nvvm.read.ptx.sreg.envreg13 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg14
- %52 = nvvm.read.ptx.sreg.envreg14 : i32
+ %53 = nvvm.read.ptx.sreg.envreg14 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg15
- %53 = nvvm.read.ptx.sreg.envreg15 : i32
+ %54 = nvvm.read.ptx.sreg.envreg15 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg16
- %54 = nvvm.read.ptx.sreg.envreg16 : i32
+ %55 = nvvm.read.ptx.sreg.envreg16 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg17
- %55 = nvvm.read.ptx.sreg.envreg17 : i32
+ %56 = nvvm.read.ptx.sreg.envreg17 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg18
- %56 = nvvm.read.ptx.sreg.envreg18 : i32
+ %57 = nvvm.read.ptx.sreg.envreg18 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg19
- %57 = nvvm.read.ptx.sreg.envreg19 : i32
+ %58 = nvvm.read.ptx.sreg.envreg19 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg20
- %58 = nvvm.read.ptx.sreg.envreg20 : i32
+ %59 = nvvm.read.ptx.sreg.envreg20 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg21
- %59 = nvvm.read.ptx.sreg.envreg21 : i32
+ %60 = nvvm.read.ptx.sreg.envreg21 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg22
- %60 = nvvm.read.ptx.sreg.envreg22 : i32
+ %61 = nvvm.read.ptx.sreg.envreg22 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg23
- %61 = nvvm.read.ptx.sreg.envreg23 : i32
+ %62 = nvvm.read.ptx.sreg.envreg23 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg24
- %62 = nvvm.read.ptx.sreg.envreg24 : i32
+ %63 = nvvm.read.ptx.sreg.envreg24 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg25
- %63 = nvvm.read.ptx.sreg.envreg25 : i32
+ %64 = nvvm.read.ptx.sreg.envreg25 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg26
- %64 = nvvm.read.ptx.sreg.envreg26 : i32
+ %65 = nvvm.read.ptx.sreg.envreg26 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg27
- %65 = nvvm.read.ptx.sreg.envreg27 : i32
+ %66 = nvvm.read.ptx.sreg.envreg27 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg28
- %66 = nvvm.read.ptx.sreg.envreg28 : i32
+ %67 = nvvm.read.ptx.sreg.envreg28 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg29
- %67 = nvvm.read.ptx.sreg.envreg29 : i32
+ %68 = nvvm.read.ptx.sreg.envreg29 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg30
- %68 = nvvm.read.ptx.sreg.envreg30 : i32
+ %69 = nvvm.read.ptx.sreg.envreg30 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg31
- %69 = nvvm.read.ptx.sreg.envreg31 : i32
+ %70 = nvvm.read.ptx.sreg.envreg31 : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq
- %70 = nvvm.read.ptx.sreg.lanemask.eq : i32
+ %71 = nvvm.read.ptx.sreg.lanemask.eq : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le
- %71 = nvvm.read.ptx.sreg.lanemask.le : i32
+ %72 = nvvm.read.ptx.sreg.lanemask.le : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt
- %72 = nvvm.read.ptx.sreg.lanemask.lt : i32
+ %73 = nvvm.read.ptx.sreg.lanemask.lt : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge
- %73 = nvvm.read.ptx.sreg.lanemask.ge : i32
+ %74 = nvvm.read.ptx.sreg.lanemask.ge : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt
- %74 = nvvm.read.ptx.sreg.lanemask.gt : i32
+ %75 = nvvm.read.ptx.sreg.lanemask.gt : i32
llvm.return %1 : i32
}
More information about the Mlir-commits
mailing list