[llvm] [NVPTX] Basic support for "grid_constant" (PR #96125)

Adam Paszke via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 20 00:55:52 PDT 2024


================
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes OPT
+; RUN: llc < %s -mcpu=sm_70 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX
+
+define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) {
+; PTX-LABEL: grid_const_int(
+; PTX-NOT:     ld.u32
+; PTX:         ld.param.{{.*}} [[R2:%.*]], [grid_const_int_param_0];
+; 
+; OPT-LABEL: define void @grid_const_int(
+; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) {
+; OPT-NOT:     alloca
+; OPT:         [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
+; OPT:         [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
+;
+  %tmp = load i32, ptr %input1, align 4
+  %add = add i32 %tmp, %input2
+  store i32 %add, ptr %out
+  ret void
+}
+
+%struct.s = type { i32, i32 }
+
+define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
+; PTX-LABEL: grid_const_struct(
+; PTX:       {
+; PTX-NOT:     ld.u32
+; PTX:         ld.param.{{.*}} [[R1:%.*]], [grid_const_struct_param_0];
+; PTX:         ld.param.{{.*}} [[R2:%.*]], [grid_const_struct_param_0+4];
+;
+; OPT-LABEL: define void @grid_const_struct(
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) {
+; OPT-NOT:     alloca
+; OPT:         [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT:         [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
+; OPT:         [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
+; OPT:         [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
+; OPT:         [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
+;
+  %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
+  %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
+  %int1 = load i32, ptr %gep1
+  %int2 = load i32, ptr %gep2
+  %add = add i32 %int1, %int2
+  store i32 %add, ptr %out
+  ret void
+}
+
+define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
+; PTX-LABEL: grid_const_escape(
+; PTX:       {
+; PTX-NOT:     .local
+; PTX:         cvta.param.{{.*}}
+; OPT-LABEL: define void @grid_const_escape(
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) {
+; OPT-NOT:     alloca [[STRUCT_S]]
+; OPT:         [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT:         [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
+; OPT:         [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
+;
+  %call = call i32 @escape(ptr %input)
+  ret void
+}
+
+define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
+; PTX-LABEL: multiple_grid_const_escape(
+; PTX:         mov.{{.*}} [[RD1:%.*]], multiple_grid_const_escape_param_0;
+; PTX:         mov.{{.*}} [[RD2:%.*]], multiple_grid_const_escape_param_2;
+; PTX:         mov.{{.*}} [[RD3:%.*]], [[RD2]];
+; PTX:         cvta.param.{{.*}} [[RD4:%.*]], [[RD3]];
+; PTX:         mov.u64 [[RD5:%.*]], [[RD1]];
+; PTX:         cvta.param.{{.*}} [[RD6:%.*]], [[RD5]];
+; PTX:         {
+; PTX:         st.param.b64 [param0+0], [[RD6]];
+; PTX:         st.param.b64 [param2+0], [[RD4]];
+;
+; OPT-LABEL: define void @multiple_grid_const_escape(
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) {
+; OPT-NOT:     alloca i32
+; OPT:         [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
+; OPT:         [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]])
+; OPT-NOT:     alloca [[STRUCT_S]]
+; OPT:         [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT:         [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
+; OPT:         [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr {{.*}}, ptr [[B_PARAM_GEN]])
+;
+  %a.addr = alloca i32, align 4
+  store i32 %a, ptr %a.addr, align 4
+  %call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
+  ret void
+}
+
+define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) {
+; PTX-LABEL: grid_const_memory_escape(
+; PTX-NOT:     .local
+; PTX:         mov.b64 [[RD1:%.*]], grid_const_memory_escape_param_0;
+; PTX:         cvta.param.u64 [[RD3:%.*]], [[RD2:%.*]];
+; PTX:         st.global.u64 [[[RD4:%.*]]], [[RD3]];
+;
+; OPT-LABEL: define void @grid_const_memory_escape(
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) {
+; OPT-NOT:     alloca [[STRUCT_S]]
+; OPT:         [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT:         [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
+; OPT:         store ptr [[INPUT_PARAM_GEN]], ptr {{.*}}, align 8
+;
+  store ptr %input, ptr %addr, align 8
----------------
apaszke wrote:

The grid_constant pointer here does not escape, but it is modified which is UB, right?

https://github.com/llvm/llvm-project/pull/96125


More information about the llvm-commits mailing list