[llvm] [NVPTX] Basic support for "grid_constant" (PR #96125)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 20 07:26:03 PDT 2024
================
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes OPT
+; RUN: llc < %s -mcpu=sm_70 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX
+
+define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) {
+; PTX-LABEL: grid_const_int(
+; PTX-NOT: ld.u32
+; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_int_param_0];
+;
+; OPT-LABEL: define void @grid_const_int(
+; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) {
+; OPT-NOT: alloca
+; OPT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
+; OPT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
+;
+ %tmp = load i32, ptr %input1, align 4
+ %add = add i32 %tmp, %input2
+ store i32 %add, ptr %out
+ ret void
+}
+
+%struct.s = type { i32, i32 }
+
+define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
+; PTX-LABEL: grid_const_struct(
+; PTX: {
+; PTX-NOT: ld.u32
+; PTX: ld.param.{{.*}} [[R1:%.*]], [grid_const_struct_param_0];
+; PTX: ld.param.{{.*}} [[R2:%.*]], [grid_const_struct_param_0+4];
+;
+; OPT-LABEL: define void @grid_const_struct(
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) {
+; OPT-NOT: alloca
+; OPT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
+; OPT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
+; OPT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
+; OPT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
+;
+ %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
+ %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
+ %int1 = load i32, ptr %gep1
+ %int2 = load i32, ptr %gep2
+ %add = add i32 %int1, %int2
+ store i32 %add, ptr %out
+ ret void
+}
+
+define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
+; PTX-LABEL: grid_const_escape(
+; PTX: {
+; PTX-NOT: .local
+; PTX: cvta.param.{{.*}}
+; OPT-LABEL: define void @grid_const_escape(
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) {
+; OPT-NOT: alloca [[STRUCT_S]]
+; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
+; OPT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
+;
+ %call = call i32 @escape(ptr %input)
+ ret void
+}
+
+define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
+; PTX-LABEL: multiple_grid_const_escape(
+; PTX: mov.{{.*}} [[RD1:%.*]], multiple_grid_const_escape_param_0;
+; PTX: mov.{{.*}} [[RD2:%.*]], multiple_grid_const_escape_param_2;
+; PTX: mov.{{.*}} [[RD3:%.*]], [[RD2]];
+; PTX: cvta.param.{{.*}} [[RD4:%.*]], [[RD3]];
+; PTX: mov.u64 [[RD5:%.*]], [[RD1]];
+; PTX: cvta.param.{{.*}} [[RD6:%.*]], [[RD5]];
+; PTX: {
+; PTX: st.param.b64 [param0+0], [[RD6]];
+; PTX: st.param.b64 [param2+0], [[RD4]];
+;
+; OPT-LABEL: define void @multiple_grid_const_escape(
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) {
+; OPT-NOT: alloca i32
+; OPT: [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
+; OPT: [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]])
+; OPT-NOT: alloca [[STRUCT_S]]
+; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
+; OPT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr {{.*}}, ptr [[B_PARAM_GEN]])
+;
+ %a.addr = alloca i32, align 4
+ store i32 %a, ptr %a.addr, align 4
+ %call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
+ ret void
+}
+
+define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) {
+; PTX-LABEL: grid_const_memory_escape(
+; PTX-NOT: .local
+; PTX: mov.b64 [[RD1:%.*]], grid_const_memory_escape_param_0;
+; PTX: cvta.param.u64 [[RD3:%.*]], [[RD2:%.*]];
+; PTX: st.global.u64 [[[RD4:%.*]]], [[RD3]];
+;
+; OPT-LABEL: define void @grid_const_memory_escape(
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) {
+; OPT-NOT: alloca [[STRUCT_S]]
+; OPT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
+; OPT: store ptr [[INPUT_PARAM_GEN]], ptr {{.*}}, align 8
+;
+ store ptr %input, ptr %addr, align 8
----------------
harinvidia wrote:
Pointer "input" annotated with grid_constant is written to memory pointed to by "addr", which is in global memory, so the pointer escapes. We don't modify the contents of grid_constant memory pointed to by "input" here
https://github.com/llvm/llvm-project/pull/96125
More information about the llvm-commits
mailing list