[polly] r311268 - Revert "[GPGPU] Simplify PPCGSCop to reduce compile time [NFC]"
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 19 16:49:27 PDT 2017
Author: grosser
Date: Sat Aug 19 16:49:26 2017
New Revision: 311268
URL: http://llvm.org/viewvc/llvm-project?rev=311268&view=rev
Log:
Revert "[GPGPU] Simplify PPCGSCop to reduce compile time [NFC]"
We still see some issues with parameter space mismatches. Revert this to get
a clean baseline. We will recommit after these issues have been resolved.
This reverts commit 0e360a14194f722ded7aa2bc9d4be2ed2efeeb49.
Modified:
polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
polly/trunk/test/GPGPU/non-zero-array-offset.ll
Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=311268&r1=311267&r2=311268&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Sat Aug 19 16:49:26 2017
@@ -2658,77 +2658,6 @@ public:
return Names;
}
- /// Remove unreferenced parameter dimensions from union_map.
- isl::union_map removeUnusedParameters(isl::union_map UMap) {
- auto New = isl::union_map::empty(isl::space(UMap.get_ctx(), 0, 0));
-
- auto RemoveUnusedDims = [&New](isl::map S) -> isl::stat {
- int Removed = 0;
- int NumDims = S.dim(isl::dim::param);
- for (long i = 0; i < NumDims; i++) {
- const int Dim = i - Removed;
- if (!S.involves_dims(isl::dim::param, Dim, 1)) {
- S = S.remove_dims(isl::dim::param, Dim, 1);
- Removed++;
- }
- }
- New = New.unite(S);
- return isl::stat::ok;
- };
-
- UMap.foreach_map(RemoveUnusedDims);
- return New;
- }
-
- /// Remove unreferenced parameter dimensions from union_set.
- isl::union_set removeUnusedParameters(isl::union_set USet) {
- auto New = isl::union_set::empty(isl::space(USet.get_ctx(), 0, 0));
-
- auto RemoveUnusedDims = [&New](isl::set S) -> isl::stat {
- int Removed = 0;
- int NumDims = S.dim(isl::dim::param);
- for (long i = 0; i < NumDims; i++) {
- const int Dim = i - Removed;
- if (!S.involves_dims(isl::dim::param, Dim, 1)) {
- S = S.remove_dims(isl::dim::param, Dim, 1);
- Removed++;
- }
- }
- New = New.unite(S);
- return isl::stat::ok;
- };
-
- USet.foreach_set(RemoveUnusedDims);
- return New;
- }
-
- /// Simplify PPCG scop to improve compile time.
- ///
- /// We drop unused parameter dimensions to reduce the size of the sets we are
- /// working with. Especially the computed dependences tend to accumulate a lot
- /// of parameters that are present in the input memory accesses, but often are
- /// not necessary to express the actual dependences. As isl represents maps
- /// and sets with dense matrices, reducing the dimensionality of isl sets
- /// commonly reduces code generation performance.
- void simplifyPPCGScop(ppcg_scop *PPCGScop) {
- PPCGScop->domain =
- removeUnusedParameters(isl::manage(PPCGScop->domain)).release();
-
- PPCGScop->dep_forced =
- removeUnusedParameters(isl::manage(PPCGScop->dep_forced)).release();
- PPCGScop->dep_false =
- removeUnusedParameters(isl::manage(PPCGScop->dep_false)).release();
- PPCGScop->dep_flow =
- removeUnusedParameters(isl::manage(PPCGScop->dep_flow)).release();
- PPCGScop->tagged_dep_flow =
- removeUnusedParameters(isl::manage(PPCGScop->tagged_dep_flow))
- .release();
-
- PPCGScop->tagged_dep_order =
- removeUnusedParameters(isl::manage(PPCGScop->tagged_dep_order))
- .release();
- }
-
/// Create a new PPCG scop from the current scop.
///
/// The PPCG scop is initialized with data from the current polly::Scop. From
@@ -2786,7 +2715,6 @@ public:
compute_tagger(PPCGScop);
compute_dependences(PPCGScop);
eliminate_dead_code(PPCGScop);
- simplifyPPCGScop(PPCGScop);
return PPCGScop;
}
@@ -3228,15 +3156,11 @@ public:
isl_schedule *Schedule = get_schedule(PPCGGen);
- /// Copy to and from device functions may introduce new parameters, which
- /// must be present in the schedule tree root for code generation. Hence,
- /// we ensure that all possible parameters are introduced from this point.
- if (!PollyManagedMemory)
- Schedule =
- isl_schedule_align_params(Schedule, S->getFullParamSpace().release());
-
int has_permutable = has_any_permutable_node(Schedule);
+ Schedule =
+ isl_schedule_align_params(Schedule, S->getFullParamSpace().release());
+
if (!has_permutable || has_permutable < 0) {
Schedule = isl_schedule_free(Schedule);
DEBUG(dbgs() << getUniqueScopName(S)
Modified: polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll?rev=311268&r1=311267&r2=311268&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll (original)
+++ polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll Sat Aug 19 16:49:26 2017
@@ -21,7 +21,7 @@
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A)
+; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_B)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
@@ -36,7 +36,7 @@
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_B)
+; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_A)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
@@ -47,13 +47,13 @@
; KERNEL-NEXT: }
-; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_A)
+; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_B)
; IR-NEXT: [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_0_params, i64 0, i64 0
; IR-NEXT: store i8* [[DEVPTR]], i8** %polly_launch_0_param_0
; IR-NEXT: [[DATA:%.*]] = bitcast i8** %polly_launch_0_param_0 to i8*
; IR-NEXT: store i8* [[DATA]], i8** [[SLOT]]
-; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_B)
+; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_A)
; IR-NEXT: [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_1_params, i64 0, i64 0
; IR-NEXT: store i8* [[DEVPTR]], i8** %polly_launch_1_param_0
; IR-NEXT: [[DATA:%.*]] = bitcast i8** %polly_launch_1_param_0 to i8*
Modified: polly/trunk/test/GPGPU/non-zero-array-offset.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/non-zero-array-offset.ll?rev=311268&r1=311267&r2=311268&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/non-zero-array-offset.ll (original)
+++ polly/trunk/test/GPGPU/non-zero-array-offset.ll Sat Aug 19 16:49:26 2017
@@ -12,14 +12,14 @@
; CODE: dim3 k0_dimBlock(8);
; CODE-NEXT: dim3 k0_dimGrid(1);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_B);
+; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
; CODE: {
; CODE-NEXT: dim3 k1_dimBlock(8);
; CODE-NEXT: dim3 k1_dimGrid(1);
-; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A);
+; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_B);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
@@ -27,10 +27,10 @@
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (8) * sizeof(float), cudaMemcpyDeviceToHost));
; CODE: # kernel0
-; CODE-NEXT: Stmt_bb3(t0);
+; CODE-NEXT: Stmt_bb11(t0);
; CODE: # kernel1
-; CODE-NEXT: Stmt_bb11(t0);
+; CODE-NEXT: Stmt_bb3(t0);
; IR: %p_dev_array_MemRef_B = call i8* @polly_allocateMemoryForDevice(i64 32)
; IR-NEXT: %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 32)
More information about the llvm-commits
mailing list