[PATCH] [ValueTracking] Consider the bounds of PTX special registers

Jingyue Wu jingyue at google.com
Sun Jun 15 17:18:02 PDT 2014


Hi eliben, jholewinski, meheff,

Some PTX special registers are bounded per CUDA programming guide.
Leveraing the bounds of these special registers can lead to more precise
value analysis.

Add two new tests in test/Transforms/InstCombine/intrinsics.ll

Depends on D4144

http://reviews.llvm.org/D4150

Files:
  lib/Analysis/ValueTracking.cpp
  test/Transforms/InstCombine/intrinsics.ll

Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -753,6 +753,29 @@
       case Intrinsic::x86_sse42_crc32_64_64:
         KnownZero = APInt::getHighBitsSet(64, 32);
         break;
+      // Some PTX special registers are bounded per CUDA programming guide
+      // (http://docs.nvidia.com/cuda/cuda-c-programming-guide/
+      // index.html#compute-capabilities).
+      // Leveraing the bounds of these special registers can lead to more
+      // precise value analysis.
+      case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+      case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+        // threadIdx.x, threadIdx.y < 1024
+        KnownZero = APInt::getHighBitsSet(32, 32 - 10);
+        break;
+      case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+        // threadIdx.z < 64
+        KnownZero = APInt::getHighBitsSet(32, 32 - 6);
+        break;
+      case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
+        // blockIdx.x < 2^31
+        KnownZero = APInt::getHighBitsSet(32, 32 - 31);
+        break;
+      case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
+      case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
+        // blockIdx.y, blockIdx.z < 65536
+        KnownZero = APInt::getHighBitsSet(32, 32 - 16);
+        break;
       }
     }
     break;
Index: test/Transforms/InstCombine/intrinsics.ll
===================================================================
--- test/Transforms/InstCombine/intrinsics.ll
+++ test/Transforms/InstCombine/intrinsics.ll
@@ -9,6 +9,12 @@
 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
 declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
 
 define i8 @uaddtest1(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
@@ -256,3 +262,41 @@
 ; CHECK-LABEL: @cttz_select(
 ; CHECK: select i1 %tobool, i32 %cttz, i32 32
 }
+
+define void @nvvm_thread_idx(i32* %output_x, i32* %output_y, i32* %output_z) {
+; CHECK-LABEL: @nvvm_thread_idx(
+  %tid_x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %tid_y = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+  %tid_z = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+; 0 <= threadIdx.x, threadIdx.y and threadIdx.z < 2^31.
+; Therefore, add i32 threadIdx.x|y|z, 5 has no unsigned wrap.
+  %x = add i32 %tid_x, 5
+  %y = add i32 %tid_y, 5
+  %z = add i32 %tid_z, 5
+; CHECK: add nuw
+; CHECK: add nuw
+; CHECK: add nuw
+  store i32 %x, i32* %output_x
+  store i32 %y, i32* %output_y
+  store i32 %z, i32* %output_z
+  ret void
+}
+
+define void @nvvm_block_idx(i32* %output_x, i32* %output_y, i32* %output_z) {
+; CHECK-LABEL: @nvvm_block_idx(
+  %bid_x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+  %bid_y = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+  %bid_z = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+; 0 <= blockIdx.x, blockIdx.y and blockIdx.z < 2^31.
+; Therefore, add i32 blockIdx.x|y|z, 5 has no unsigned wrap.
+  %x = add i32 %bid_x, 5
+  %y = add i32 %bid_y, 5
+  %z = add i32 %bid_z, 5
+; CHECK: add nuw
+; CHECK: add nuw
+; CHECK: add nuw
+  store i32 %x, i32* %output_x
+  store i32 %y, i32* %output_y
+  store i32 %z, i32* %output_z
+  ret void
+}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D4150.10434.patch
Type: text/x-patch
Size: 3532 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140616/7a2de8c4/attachment.bin>


More information about the llvm-commits mailing list