[polly] r275573 - GPGPU: Use schedule whole components for scheduler
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 15 09:15:48 PDT 2016
Author: grosser
Date: Fri Jul 15 11:15:47 2016
New Revision: 275573
URL: http://llvm.org/viewvc/llvm-project?rev=275573&view=rev
Log:
GPGPU: Use schedule whole components for scheduler
This option increases the scalability of the scheduler and allows us to remove
the 'gisting' workaround we introduced in r275565 to handle a more complicated
test case. Another benefit of using this option is also that the generated
code looks a lot more streamlined.
Thanks to Sven Verdoolaege for reminding me of this option.
Modified:
polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
polly/trunk/test/GPGPU/scheduler-timeout.ll
Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=275573&r1=275572&r2=275573&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Fri Jul 15 11:15:47 2016
@@ -263,15 +263,6 @@ public:
compute_tagger(PPCGScop);
compute_dependences(PPCGScop);
- // Remove domain constraints from flow dependences.
- //
- // The isl scheduler does not terminate even for some smaller cases in case
- // domain constraints remain within these dependences.
- //
- // TODO: Improve the isl scheduler to not handle this case better.
- PPCGScop->dep_flow = isl_union_map_gist_domain(
- PPCGScop->dep_flow, isl_union_set_copy(PPCGScop->domain));
-
return PPCGScop;
}
@@ -581,6 +572,7 @@ public:
// Set scheduling strategy to same strategy PPCG is using.
isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true);
isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true);
+ isl_options_set_schedule_whole_component(PPCGGen->ctx, false);
isl_schedule *Schedule = get_schedule(PPCGGen);
Modified: polly/trunk/test/GPGPU/scheduler-timeout.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/scheduler-timeout.ll?rev=275573&r1=275572&r2=275573&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/scheduler-timeout.ll (original)
+++ polly/trunk/test/GPGPU/scheduler-timeout.ll Fri Jul 15 11:15:47 2016
@@ -30,7 +30,7 @@ target triple = "x86_64-unknown-linux-gn
; CODE:Code
; CODE-NEXT:====
; CODE-NEXT:# host
-; CODE-NEXT:{
+; CODE-NEXT: {
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_alpha, &MemRef_alpha, sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
@@ -38,34 +38,41 @@ target triple = "x86_64-unknown-linux-gn
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_beta, &MemRef_beta, sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_C, MemRef_C, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(128);
+; CODE-NEXT: dim3 k0_dimBlock(16, 32);
+; CODE-NEXT: dim3 k0_dimGrid(128, 128);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
+; CODE: {
+; CODE-NEXT: dim3 k1_dimBlock(16, 32);
+; CODE-NEXT: dim3 k1_dimGrid(128, 128);
+; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> ();
+; CODE-NEXT: cudaCheckKernel();
+; CODE-NEXT: }
+
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_tmp, dev_MemRef_tmp, (4096) * (4096) * sizeof(float), cudaMemcpyDeviceToHost));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_D, dev_MemRef_D, (4096) * (4096) * sizeof(float), cudaMemcpyDeviceToHost));
; CODE-NEXT: }
; CODE: # kernel0
-; CODE-NEXT: for (int c1 = 0; c1 <= 255; c1 += 1)
-; CODE-NEXT: for (int c2 = c1; c2 <= min(255, c1 + 128); c2 += 1)
-; CODE-NEXT: for (int c4 = max(0, -32 * c1 + 32 * c2 - 4095); c4 <= min(31, -32 * c1 + 8190); c4 += 1) {
-; CODE-NEXT: if (c1 == 127 && c2 == 127 && c4 == 31)
-; CODE-NEXT: Stmt_for_body36(32 * b0 + t0, 4095);
-; CODE-NEXT: for (int c5 = max(0, 32 * c1 - 32 * c2 + c4); c5 <= min(min(31, -32 * c2 + 8190), 32 * c1 - 32 * c2 + c4 + 4095); c5 += 1) {
-; CODE-NEXT: if (c2 == c1 && 32 * c1 + c4 <= 4095 && c5 == c4)
-; CODE-NEXT: Stmt_for_body6(32 * b0 + t0, 32 * c1 + c4);
-; CODE-NEXT: if (32 * c1 + c4 <= 4095) {
-; CODE-NEXT: Stmt_for_body11(32 * b0 + t0, 32 * c1 + c4, -32 * c1 + 32 * c2 - c4 + c5);
-; CODE-NEXT: if (c2 == 127 && c5 == 30)
-; CODE-NEXT: Stmt_for_body36(32 * b0 + t0, 32 * c1 + c4);
-; CODE-NEXT: }
-; CODE-NEXT: if (32 * c2 + c5 >= 4095)
-; CODE-NEXT: Stmt_for_body44(32 * b0 + t0, 32 * c1 - 32 * c2 + c4 - c5 + 4095, 32 * c2 + c5 - 4095);
-; CODE-NEXT: }
-; CODE-NEXT: }
+; CODE-NEXT: for (int c2 = 0; c2 <= 127; c2 += 1)
+; CODE-NEXT: for (int c4 = 0; c4 <= 1; c4 += 1) {
+; CODE-NEXT: if (c2 == 0)
+; CODE-NEXT: Stmt_for_body6(32 * b0 + t0, 32 * b1 + t1 + 16 * c4);
+; CODE-NEXT: for (int c5 = 0; c5 <= 31; c5 += 1)
+; CODE-NEXT: Stmt_for_body11(32 * b0 + t0, 32 * b1 + t1 + 16 * c4, 32 * c2 + c5);
+; CODE-NEXT: }
+
+; CODE: # kernel1
+; CODE-NEXT: for (int c2 = 0; c2 <= 127; c2 += 1)
+; CODE-NEXT: for (int c4 = 0; c4 <= 1; c4 += 1) {
+; CODE-NEXT: if (c2 == 0)
+; CODE-NEXT: Stmt_for_body36(32 * b0 + t0, 32 * b1 + t1 + 16 * c4);
+; CODE-NEXT: for (int c5 = 0; c5 <= 31; c5 += 1)
+; CODE-NEXT: Stmt_for_body44(32 * b0 + t0, 32 * b1 + t1 + 16 * c4, 32 * c2 + c5);
+; CODE-NEXT: }
+
; Function Attrs: argmemonly nounwind
More information about the llvm-commits
mailing list