[llvm] 2ac8e6b - [NVPTX] Implement `__builtin_readcyclecounter` on NVPTX (#81344)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 12 05:07:53 PST 2024
Author: Joseph Huber
Date: 2024-02-12T07:07:48-06:00
New Revision: 2ac8e6b7f5b8f495f496a55512e5ea452beb0bca
URL: https://github.com/llvm/llvm-project/commit/2ac8e6b7f5b8f495f496a55512e5ea452beb0bca
DIFF: https://github.com/llvm/llvm-project/commit/2ac8e6b7f5b8f495f496a55512e5ea452beb0bca.diff
LOG: [NVPTX] Implement `__builtin_readcyclecounter` on NVPTX (#81344)
Summary:
This patch simply states that `__builtin_readcyclecounter` is legal on
NVPTX and makes it return the value from the `clock64` sreg. The timer
intrinsics are marked as having side effects, which is desireable for
timing primitives and required to pattern match the instrinic DAG.
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/test/CodeGen/NVPTX/intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 5c24f00dbca0e2..80a67ca8e368c6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -489,6 +489,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+
setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 2330d7213c26dc..133e2827008a87 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -6372,12 +6372,16 @@ def INT_PTX_SREG_LANEMASK_GE :
def INT_PTX_SREG_LANEMASK_GT :
PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
+let hasSideEffects = 1 in {
def INT_PTX_SREG_CLOCK :
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
def INT_PTX_SREG_CLOCK64 :
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
def INT_PTX_SREG_GLOBALTIMER :
PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
+}
+
+def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>;
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 2994f600d45c71..d84ee6754281e5 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -151,6 +151,17 @@ define i64 @test_globaltimer() {
ret i64 %ret
}
+; CHECK-LABEL: test_cyclecounter
+define i64 @test_cyclecounter() {
+; CHECK: mov.u64 %r{{.*}}, %clock64;
+ %a = tail call i64 @llvm.readcyclecounter()
+; CHECK: mov.u64 %r{{.*}}, %clock64;
+ %b = tail call i64 @llvm.readcyclecounter()
+ %ret = add i64 %a, %b
+; CHECK: ret
+ ret i64 %ret
+}
+
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
@@ -166,3 +177,4 @@ declare i32 @llvm.nvvm.read.ptx.sreg.clock()
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
declare void @llvm.nvvm.exit()
declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
+declare i64 @llvm.readcyclecounter()
More information about the llvm-commits
mailing list