[llvm] [clang] [NVPTX} Add builtin support for 'globaltimer' (PR #79765)

via cfe-commits cfe-commits at lists.llvm.org
Sun Jan 28 11:06:43 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-ir

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
This patch adds support for `globaltimer` to match `clock` and
`clock64`. See the PTX ISA reference fro details. This patch does not
implement the `hi` or `lo` variants for brevity as they can be obtained
from this with the cost of an additional register.
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer-globaltimer-lo-globaltimer-hi


---
Full diff: https://github.com/llvm/llvm-project/pull/79765.diff


5 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsNVPTX.def (+1) 
- (modified) clang/test/CodeGen/builtins-nvptx.c (+3-1) 
- (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+2) 
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+2) 
- (modified) llvm/test/CodeGen/NVPTX/intrinsics.ll (+11) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index 0f2e8260143be78..57a229ded49f886 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -146,6 +146,7 @@ BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")
 
 BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
 BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
+BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi", "n")
 
 BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
 BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index 353f3ebb608c2b1..5aab6bee5b1cc1d 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -134,11 +134,13 @@ __device__ long long read_clocks() {
 
 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
 // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
+// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
 
   int a = __nvvm_read_ptx_sreg_clock();
   long long b = __nvvm_read_ptx_sreg_clock64();
+  long long c = __nvvm_read_ptx_sreg_globaltimer();
 
-  return a + b;
+  return a + b + c;
 }
 
 __device__ int read_pms() {
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 5a5ba2592e1467e..8c9ed4a349ba998 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -4506,6 +4506,8 @@ def int_nvvm_read_ptx_sreg_lanemask_gt :
 def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
 def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
 
+def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64<"globaltimer">;
+
 def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
 def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
 def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 33f1e4a43e072af..5c509b50411701a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -6364,6 +6364,8 @@ def INT_PTX_SREG_CLOCK :
     PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
 def INT_PTX_SREG_CLOCK64 :
     PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
+def INT_PTX_SREG_GLOBALTIMER :
+    PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
 
 def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
 def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index c09c7a72fd10181..ba521b2f11cbbec 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -133,6 +133,17 @@ define i64 @test_clock64() {
   ret i64 %ret
 }
 
+; CHECK-LABEL: test_globaltimer
+define i64 @test_globaltimer() {
+; CHECK: mov.u64         %r{{.*}}, %globaltimer;
+  %a = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
+; CHECK: mov.u64         %r{{.*}}, %globaltimer;
+  %b = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
+  %ret = add i64 %a, %b
+; CHECK: ret
+  ret i64 %ret
+}
+
 declare float @llvm.fabs.f32(float)
 declare double @llvm.fabs.f64(double)
 declare float @llvm.nvvm.sqrt.f(float)

``````````

</details>


https://github.com/llvm/llvm-project/pull/79765


More information about the cfe-commits mailing list