[Polly] Isl AST reduction pragmas [V2]

Tobias Grosser tobias at grosser.es
Fri Jun 20 01:25:49 PDT 2014


On 19/06/2014 20:47, Johannes Doerfert wrote:
> On 19/06/2014 18:56, Johannes Doerfert wrote:
>> So if I have the dependences and schedule like shown below (assume they are e.g., WAW dependences).
>> Would a function like "astScheduleDimIsParallel" in IslAst tell me it is parallel or not (without my patch and reductions)?
>>
>> I was under the impression that it would (and should) say __not__ parallel.
>
>>> It would say 'parallel', as there are no loop carried dependences between the statements that are part of the loop. Or does the code suggest otherwise?
> There is a loop carried dependency.
>
> Looking at the C versions (before & after) we clearly see the assignments to the same memory location.
> Looking at the dependences we see a dependency between one iteration and every following one (even if transitive dependences are only implicitly represented).
> Looking at the "parallelism tests" (e.g., the one ion astScheduleDimIsParallel [btw. there are also others which perform the same/a similar function... we should not duplicate such important functions!]),
>    we see that the original dependences S[i]->S[i+1] are transformed according to the schedule and then we look if the dependency is carried by an outer loop or not carried by the current dimension.
>
> My point is that in all these cases I see the forward dependency from one iteration to the next of the same loop, and one between the loops.
>
> I simply do not understand where we get parallelism here, other than from the "excluded" (see above) reductions. Even with reductions the test (to my understanding) will yield the correct result (which is now "reduction parallel", not "not parallel" any more).

I attached a test case derived from this C file.

void foo(float *A, long n) {
	for (long i = 0; i < 2*n; i++)
	  A[0] += i;

	for (long i = 0; i < 2*n; i++)
           A[i + 1] = 1;
}

$ polly-opt -polly-ast -analyze test.preopt.ll -disable-polly-legality 
-polly-ast-detect-parallel

if (1)

     {
       for (int c1 = 0; c1 < 2 * n; c1 += 1)
         Stmt_for_body(c1);
       #pragma simd
       #pragma omp parallel for
       for (int c1 = 0; c1 < 2 * n; c1 += 1)
         Stmt_for_body6(c1);
     }

else
     {  /* original code */ }


We can import the following schedule (see attached jscop file):


"[n] -> { Stmt_for_body[i0] -> scattering[0, i0, 0]: i0 % 2 = 0;
           Stmt_for_body[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }"
"[n] -> { Stmt_for_body6[i0] -> scattering[1, i0, 0] }"


$ polly-opt -polly-import-jscop -polly-ast -analyze test.preopt.ll 
-disable-polly-legality -polly-ast-detect-parallel


     for (int c0 = 0; c0 <= 2; c0 += 1) {
       if (c0 == 2) {
         #pragma simd
         #pragma omp parallel for
         for (int c1 = 1; c1 < 2 * n; c1 += 2)
           Stmt_for_body(c1);
       } else if (c0 == 1) {
         #pragma simd
         #pragma omp parallel for
         for (int c1 = 0; c1 < 2 * n; c1 += 1)
           Stmt_for_body6(c1);
       } else
         #pragma simd
         #pragma omp parallel for
         for (int c1 = 0; c1 < 2 * n - 1; c1 += 2)
           Stmt_for_body(c1);
     }

Cheers,
Tobias
-------------- next part --------------
; ModuleID = 'test.ll'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nounwind uwtable
define void @foo(float* %A, i64 %n) #0 {
entry:
  br label %entry.split

entry.split:                                      ; preds = %entry
  %cmp3 = icmp sgt i64 %n, 0
  br i1 %cmp3, label %for.body.lr.ph, label %for.cond2.preheader

for.body.lr.ph:                                   ; preds = %entry.split
  %0 = mul i64 %n, 2
  %1 = icmp sgt i64 %0, 1
  %smax5 = select i1 %1, i64 %0, i64 1
  br label %for.body

for.cond.for.cond2.preheader_crit_edge:           ; preds = %for.body
  br label %for.cond2.preheader

for.cond2.preheader:                              ; preds = %for.cond.for.cond2.preheader_crit_edge, %entry.split
  %cmp41 = icmp sgt i64 %n, 0
  br i1 %cmp41, label %for.body6.lr.ph, label %for.end11

for.body6.lr.ph:                                  ; preds = %for.cond2.preheader
  %2 = mul i64 %n, 2
  %3 = icmp sgt i64 %2, 1
  %smax = select i1 %3, i64 %2, i64 1
  br label %for.body6

for.body:                                         ; preds = %for.body.lr.ph, %for.body
  %4 = phi i64 [ 0, %for.body.lr.ph ], [ %6, %for.body ]
  %conv = sitofp i64 %4 to float
  %5 = load float* %A, align 4
  %add = fadd float %conv, %5
  store float %add, float* %A, align 4
  %6 = add nsw i64 %4, 1
  %exitcond6 = icmp ne i64 %6, %smax5
  br i1 %exitcond6, label %for.body, label %for.cond.for.cond2.preheader_crit_edge

for.body6:                                        ; preds = %for.body6.lr.ph, %for.body6
  %i1.02 = phi i64 [ 0, %for.body6.lr.ph ], [ %7, %for.body6 ]
  %inc10 = add i64 %i1.02, 1
  %arrayidx8 = getelementptr float* %A, i64 %inc10
  store float 1.000000e+00, float* %arrayidx8, align 4
  %7 = add nsw i64 %i1.02, 1
  %exitcond = icmp ne i64 %7, %smax
  br i1 %exitcond, label %for.body6, label %for.cond2.for.end11_crit_edge

for.cond2.for.end11_crit_edge:                    ; preds = %for.body6
  br label %for.end11

for.end11:                                        ; preds = %for.cond2.for.end11_crit_edge, %for.cond2.preheader
  ret void
}

attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.ident = !{!0}

!0 = metadata !{metadata !"clang version 3.5.0 "}
-------------- next part --------------
{
   "context" : "[n] -> {  : n >= -9223372036854775808 and n <= 9223372036854775807 }",
   "name" : "entry.split => for.end11",
   "statements" : [
      {
         "accesses" : [
            {
               "kind" : "read",
               "relation" : "[n] -> { Stmt_for_body[i0] -> MemRef_A[0] }"
            },
            {
               "kind" : "write",
               "relation" : "[n] -> { Stmt_for_body[i0] -> MemRef_A[0] }"
            }
         ],
         "domain" : "[n] -> { Stmt_for_body[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
         "name" : "Stmt_for_body",
         "schedule" : "[n] -> { Stmt_for_body[i0] -> scattering[0, i0, 0]: i0 % 2 = 0; Stmt_for_body[i0] -> scattering[2, i0, 0]: i0 % 2 = 1 }"
      },
      {
         "accesses" : [
            {
               "kind" : "write",
               "relation" : "[n] -> { Stmt_for_body6[i0] -> MemRef_A[1 + i0] }"
            }
         ],
         "domain" : "[n] -> { Stmt_for_body6[i0] : i0 >= 0 and i0 <= -1 + 2n and n >= 1 }",
         "name" : "Stmt_for_body6",
         "schedule" : "[n] -> { Stmt_for_body6[i0] -> scattering[1, i0, 0] }"
      }
   ]
}


More information about the llvm-commits mailing list