[llvm] [NVPTX] Add ranges to intrinsic definitions, cleanup NVVMIntrRange (PR #138338)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Fri May 2 14:40:09 PDT 2025
================
@@ -74,10 +75,149 @@ define ptx_kernel i32 @test_inlined() "nvvm.maxntid"="4" {
ret i32 %1
}
+define ptx_kernel i32 @test_cluster_ctaid() "nvvm.maxclusterrank"="8" {
+; CHECK-LABEL: define ptx_kernel i32 @test_cluster_ctaid(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 8) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 8) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 8) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 1, 9) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 9) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 1, 9) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP6]]
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[TMP15]]
+;
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+ %7 = add i32 %1, %2
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %4
+ %10 = add i32 %9, %5
+ %11 = add i32 %10, %6
+ ret i32 %11
+}
+
+define ptx_kernel i32 @test_cluster_dim() "nvvm.cluster_dim"="4,4,1" {
+; CHECK-LABEL: define ptx_kernel i32 @test_cluster_dim(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 16) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 16) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 16) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 1, 17) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 17) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 1, 17) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP6]]
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[TMP15]]
+;
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+ %7 = add i32 %1, %2
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %4
+ %10 = add i32 %9, %5
+ %11 = add i32 %10, %6
+ ret i32 %11
+}
+
+
+; DEFAULT-DAG: declare noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; DEFAULT-DAG: declare noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+; DEFAULT-DAG: declare noundef range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+; DEFAULT-DAG: declare noundef range(i32 0, 1) i32 @llvm.nvvm.read.ptx.sreg.tid.w()
+
+; DEFAULT-DAG: declare noundef range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+; DEFAULT-DAG: declare noundef range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+; DEFAULT-DAG: declare noundef range(i32 1, 65) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+; DEFAULT-DAG: declare noundef range(i32 0, 1) i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
+
+; DEFAULT-DAG: declare noundef range(i32 0, 32) i32 @llvm.nvvm.read.ptx.sreg.laneid()
+; DEFAULT-DAG: declare noundef range(i32 32, 33) i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+
+; DEFAULT-DAG: declare noundef range(i32 0, 2147483647) i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
----------------
Artem-B wrote:
Nit: Technically this is not the right value for sm_20, which had a max of 65535.
On the other hand, it's not invalid, just suboptimal, and we do not care about sm_20 these days.
I'd just add a note about that where we define the constant, and move on.
https://github.com/llvm/llvm-project/pull/138338
More information about the llvm-commits
mailing list