[clang] e633807 - [NVPTX] Add builtin support for 'globaltimer' (#79765)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 29 12:12:00 PST 2024
Author: Joseph Huber
Date: 2024-01-29T14:11:54-06:00
New Revision: e633807a1fccbed91dbfe1fdc2c78adcaf21d99c
URL: https://github.com/llvm/llvm-project/commit/e633807a1fccbed91dbfe1fdc2c78adcaf21d99c
DIFF: https://github.com/llvm/llvm-project/commit/e633807a1fccbed91dbfe1fdc2c78adcaf21d99c.diff
LOG: [NVPTX] Add builtin support for 'globaltimer' (#79765)
Summary:
This patch adds support for `globaltimer` to match `clock` and
`clock64`. See the PTX ISA reference for details. This patch does not
implement the `hi` or `lo` variants for brevity as they can be obtained
from this with the cost of an additional register.
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer-globaltimer-lo-globaltimer-hi
Added:
Modified:
clang/include/clang/Basic/BuiltinsNVPTX.def
clang/test/CodeGen/builtins-nvptx.c
llvm/include/llvm/IR/IntrinsicsNVVM.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/test/CodeGen/NVPTX/intrinsics.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index ed67f0877aee37..648c013a3002ef 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -148,6 +148,7 @@ BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
+BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi", "n")
BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index 4c2cca2f5af4ca..ad7c27f2d60d26 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -134,11 +134,13 @@ __device__ long long read_clocks() {
// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
+// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
int a = __nvvm_read_ptx_sreg_clock();
long long b = __nvvm_read_ptx_sreg_clock64();
+ long long c = __nvvm_read_ptx_sreg_globaltimer();
- return a + b;
+ return a + b + c;
}
__device__ int read_pms() {
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index e432f43f98a305..d825dc82156432 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -4510,6 +4510,8 @@ def int_nvvm_read_ptx_sreg_lanemask_gt :
def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
+def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64<"globaltimer">;
+
def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 87ae1ef6d738bf..2330d7213c26dc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -6376,6 +6376,8 @@ def INT_PTX_SREG_CLOCK :
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
def INT_PTX_SREG_CLOCK64 :
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
+def INT_PTX_SREG_GLOBALTIMER :
+ PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 4b7d5c8f239076..7e45b1feb7fd02 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -140,6 +140,17 @@ define void @test_exit() {
ret void
}
+; CHECK-LABEL: test_globaltimer
+define i64 @test_globaltimer() {
+; CHECK: mov.u64 %r{{.*}}, %globaltimer;
+ %a = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
+; CHECK: mov.u64 %r{{.*}}, %globaltimer;
+ %b = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
+ %ret = add i64 %a, %b
+; CHECK: ret
+ ret i64 %ret
+}
+
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
@@ -154,3 +165,4 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
declare void @llvm.nvvm.exit()
+declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
More information about the cfe-commits
mailing list