[llvm] [NVPTX] Add ranges to intrinsic definitions, cleanup NVVMIntrRange (PR #138338)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Sat May 3 17:04:37 PDT 2025
================
@@ -74,10 +75,149 @@ define ptx_kernel i32 @test_inlined() "nvvm.maxntid"="4" {
ret i32 %1
}
+define ptx_kernel i32 @test_cluster_ctaid() "nvvm.maxclusterrank"="8" {
+; CHECK-LABEL: define ptx_kernel i32 @test_cluster_ctaid(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 8) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 8) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 8) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 1, 9) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 9) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 1, 9) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP6]]
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[TMP15]]
+;
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+ %7 = add i32 %1, %2
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %4
+ %10 = add i32 %9, %5
+ %11 = add i32 %10, %6
+ ret i32 %11
+}
+
+define ptx_kernel i32 @test_cluster_dim() "nvvm.cluster_dim"="4,4,1" {
+; CHECK-LABEL: define ptx_kernel i32 @test_cluster_dim(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 16) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+; CHECK-NEXT: [[TMP2:%.*]] = call range(i32 0, 16) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 16) i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 1, 17) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+; CHECK-NEXT: [[TMP6:%.*]] = call range(i32 1, 17) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+; CHECK-NEXT: [[TMP7:%.*]] = call range(i32 1, 17) i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[TMP6]]
+; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP13]], [[TMP7]]
+; CHECK-NEXT: ret i32 [[TMP15]]
+;
+ %1 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x()
+ %2 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y()
+ %3 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z()
+ %4 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x()
+ %5 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y()
+ %6 = call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z()
+ %7 = add i32 %1, %2
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %4
+ %10 = add i32 %9, %5
+ %11 = add i32 %10, %6
+ ret i32 %11
+}
+
+
+; DEFAULT-DAG: declare noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; DEFAULT-DAG: declare noundef range(i32 0, 1024) i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+; DEFAULT-DAG: declare noundef range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+; DEFAULT-DAG: declare noundef range(i32 0, 1) i32 @llvm.nvvm.read.ptx.sreg.tid.w()
+
+; DEFAULT-DAG: declare noundef range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+; DEFAULT-DAG: declare noundef range(i32 1, 1025) i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+; DEFAULT-DAG: declare noundef range(i32 1, 65) i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+; DEFAULT-DAG: declare noundef range(i32 0, 1) i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
+
+; DEFAULT-DAG: declare noundef range(i32 0, 32) i32 @llvm.nvvm.read.ptx.sreg.laneid()
+; DEFAULT-DAG: declare noundef range(i32 32, 33) i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+
+; DEFAULT-DAG: declare noundef range(i32 0, 2147483647) i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
----------------
AlexMaclean wrote:
Sounds good. Added a comment in IntrinsicsNVVM.td where MAX_GRID_SIZE_X is defined.
https://github.com/llvm/llvm-project/pull/138338
More information about the llvm-commits
mailing list