[llvm] r352256 - [NVPTX] Some nvvm.read.ptx.sreg intrinsics should have IntrInaccessibleMemOnly attribute.
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 25 16:28:32 PST 2019
Author: tra
Date: Fri Jan 25 16:28:32 2019
New Revision: 352256
URL: http://llvm.org/viewvc/llvm-project?rev=352256&view=rev
Log:
[NVPTX] Some nvvm.read.ptx.sreg intrinsics should have IntrInaccessibleMemOnly attribute.
These intrinsics may return different values every time they are called
and should not be CSE'd. IntrInaccessibleMemOnly appears to be the right
attribute to model this behavior.
Differential Revision: https://reviews.llvm.org/D57259
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td?rev=352256&r1=352255&r2=352256&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td Fri Jan 25 16:28:32 2019
@@ -3673,11 +3673,19 @@ multiclass PTXReadSRegIntrinsic_v4i32<st
class PTXReadSRegIntrinsic_r32<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
-
class PTXReadSRegIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+// Intrinsics to read registers with non-constant values. E.g. the values that
+// do change over the kernel lifetime. Such reads should not be CSE'd.
+class PTXReadNCSRegIntrinsic_r32<string name>
+ : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+class PTXReadNCSRegIntrinsic_r64<string name>
+ : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
+ GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
@@ -3703,13 +3711,13 @@ def int_nvvm_read_ptx_sreg_lanemask_ge :
def int_nvvm_read_ptx_sreg_lanemask_gt :
PTXReadSRegIntrinsic_r32<"lanemask_gt">;
-def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
-def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
+def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
+def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
-def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
-def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
-def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
-def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
+def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
+def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
+def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
+def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
Modified: llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll?rev=352256&r1=352255&r2=352256&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll Fri Jan 25 16:28:32 2019
@@ -94,6 +94,43 @@ define i32 @test_popc16_to_32(i16 %a) {
ret i32 %zext
}
+; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
+; be CSE'd.
+; CHECK-LABEL: test_tid
+define i32 @test_tid() {
+; CHECK: mov.u32 %r{{.*}}, %tid.x;
+ %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x;
+ %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %ret = add i32 %a, %b
+; CHECK: ret
+ ret i32 %ret
+}
+
+; reading clock() or clock64() should not be CSE'd as each read may return
+; different value.
+; CHECK-LABEL: test_clock
+define i32 @test_clock() {
+; CHECK: mov.u32 %r{{.*}}, %clock;
+ %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
+; CHECK: mov.u32 %r{{.*}}, %clock;
+ %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
+ %ret = add i32 %a, %b
+; CHECK: ret
+ ret i32 %ret
+}
+
+; CHECK-LABEL: test_clock64
+define i64 @test_clock64() {
+; CHECK: mov.u64 %r{{.*}}, %clock64;
+ %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
+; CHECK: mov.u64 %r{{.*}}, %clock64;
+ %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
+ %ret = add i64 %a, %b
+; CHECK: ret
+ ret i64 %ret
+}
+
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
@@ -103,3 +140,7 @@ declare i64 @llvm.bitreverse.i64(i64)
declare i16 @llvm.ctpop.i16(i16)
declare i32 @llvm.ctpop.i32(i32)
declare i64 @llvm.ctpop.i64(i64)
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.clock()
+declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
More information about the llvm-commits
mailing list