[polly] r311259 - [GPGPU] Correctly initialize array order and fixed_element information

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 19 13:21:22 PDT 2017


Author: grosser
Date: Sat Aug 19 13:21:22 2017
New Revision: 311259

URL: http://llvm.org/viewvc/llvm-project?rev=311259&view=rev
Log:
[GPGPU] Correctly initialize array order and fixed_element information

Summary:
This information is necessary for PPCG to perform correct life range reordering.
With these changes applied we can live-range reorder some of the important
kernels in COSMO.

We also update and rename one test case, which previously could not be optimized
and now is optimized thanks to live-range reordering. To preserve test coverage
we add a new test case scalar-writes-in-scop-requires-abort.ll, which exercises
our automatic abort in case of scalar writes in the kernel.

Reviewers: Meinersbur, bollu, singam-sanjay

Subscribers: nemanjai, pollydev, llvm-commits, kbarton

Tags: #polly

Differential Revision: https://reviews.llvm.org/D36929

Added:
    polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll
      - copied, changed from r311248, polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll
    polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll
Removed:
    polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll
Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/lib/External/ppcg/gpu.c
    polly/trunk/lib/External/ppcg/gpu.h
    polly/trunk/test/GPGPU/non-read-only-scalars.ll

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=311259&r1=311258&r2=311259&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Sat Aug 19 13:21:22 2017
@@ -2815,6 +2815,9 @@ public:
       Access->ref_id = Acc->getId().release();
       Access->next = Accesses;
       Access->n_index = Acc->getScopArrayInfo()->getNumberOfDimensions();
+      // TODO: Also mark one-element accesses to arrays as fixed-element.
+      Access->fixed_element =
+          Acc->isLatestScalarKind() ? isl_bool_true : isl_bool_false;
       Accesses = Access;
     }
 
@@ -3029,6 +3032,7 @@ public:
       i++;
 
       collect_references(PPCGProg, &PPCGArray);
+      PPCGArray.only_fixed_element = only_fixed_element_accessed(&PPCGArray);
     }
   }
 
@@ -3070,13 +3074,6 @@ public:
     PPCGProg->to_outer = getArrayIdentity();
     // TODO: verify that this assignment is correct.
     PPCGProg->any_to_outer = nullptr;
-
-    // this needs to be set when live range reordering is enabled.
-    // NOTE: I believe that is conservatively correct. I'm not sure
-    //       what the semantics of this is.
-    // Quoting PPCG/gpu.h: "Order dependences on non-scalars."
-    PPCGProg->array_order =
-        isl_union_map_empty(isl_set_get_space(PPCGScop->context));
     PPCGProg->n_stmts = std::distance(S->begin(), S->end());
     PPCGProg->stmts = getStatements();
 
@@ -3099,6 +3096,9 @@ public:
 
     createArrays(PPCGProg, ValidSAIs);
 
+    PPCGProg->array_order = nullptr;
+    collect_order_dependences(PPCGProg);
+
     PPCGProg->may_persist = compute_may_persist(PPCGProg);
     return PPCGProg;
   }

Modified: polly/trunk/lib/External/ppcg/gpu.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.c?rev=311259&r1=311258&r2=311259&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.c (original)
+++ polly/trunk/lib/External/ppcg/gpu.c Sat Aug 19 13:21:22 2017
@@ -162,7 +162,7 @@ static int is_read_only_scalar(struct gp
 /* Is "array" only accessed as individual, fixed elements?
  * That is, does each access to "array" access a single, fixed element?
  */
-static isl_bool only_fixed_element_accessed(struct gpu_array_info *array)
+isl_bool only_fixed_element_accessed(struct gpu_array_info *array)
 {
 	int i;
 
@@ -250,6 +250,9 @@ static int extract_array_info(struct gpu
 static __isl_give isl_union_map *remove_independences(struct gpu_prog *prog,
 	struct gpu_array_info *array, __isl_take isl_union_map *order)
 {
+	// We do not have independence information in Polly. Hence, make this
+	// function a no-op.
+	return order;
 	int i;
 
 	for (i = 0; i < prog->scop->pet->n_independence; ++i) {

Modified: polly/trunk/lib/External/ppcg/gpu.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.h?rev=311259&r1=311258&r2=311259&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.h (original)
+++ polly/trunk/lib/External/ppcg/gpu.h Sat Aug 19 13:21:22 2017
@@ -454,4 +454,6 @@ __isl_give isl_ast_node *generate_code(s
 
 __isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog);
 void collect_references(struct gpu_prog *prog, struct gpu_array_info *array);
+void collect_order_dependences(struct gpu_prog *prog);
+isl_bool only_fixed_element_accessed(struct gpu_array_info *array);
 #endif

Removed: polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll?rev=311258&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll (original)
+++ polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll (removed)
@@ -1,84 +0,0 @@
-; RUN: opt %loadPolly -analyze -polly-use-llvm-names -polly-scops \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP
-
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; SCOP:      Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-; SCOP-NEXT: Max Loop Depth:  1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT:         ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT:             [tmp, tmp1] -> { Stmt_if_end[i0] -> MemRef_end[0] };
-; SCOP-NEXT:         Execution Context: [tmp, tmp1] -> {  :  }
-; SCOP-NEXT:         ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT:             [tmp, tmp1] -> { Stmt_for_body[i0] -> MemRef_control[0] };
-; SCOP-NEXT:         Execution Context: [tmp, tmp1] -> {  : tmp > 0 }
-; SCOP-NEXT: }
-
-; Check that we generate a correct "always false" branch.
-; HOST-IR:  br i1 false, label %polly.start, label %entry.split.pre_entry_bb
-
-; This test case checks that we generate correct code if PPCGCodeGeneration
-; decides a build is unsuccessful with invariant load hoisting enabled.
-;
-; There is a conditional branch which switches between the original code and
-; the new code. We try to set this conditional branch to branch on false.
-; However, invariant load hoisting changes the structure of the scop, so we
-; need to change the way we *locate* this instruction.
-;
-;    void f(const int *end, int *arr, const int *control, const int *readarr) {
-;      for (int i = 0; i < *end; i++) {
-;        int t = 0;
-;        if (*control > 3) {
-;          t += readarr[i];
-;        }
-;        arr[i] = t;
-;      }
-;    }
-;
-
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-target triple = "i386-apple-macosx10.12.0"
-
-define void @f(i32* %end, i32* %arr, i32* %control, i32* %readarr) {
-entry:
-  br label %entry.split
-
-entry.split:                                      ; preds = %entry
-  %tmp3 = load i32, i32* %end, align 4
-  %cmp4 = icmp sgt i32 %tmp3, 0
-  br i1 %cmp4, label %for.body.lr.ph, label %for.end
-
-for.body.lr.ph:                                   ; preds = %entry.split
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.lr.ph, %if.end
-  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
-  %tmp1 = load i32, i32* %control, align 4
-  %cmp1 = icmp sgt i32 %tmp1, 3
-  br i1 %cmp1, label %if.then, label %if.end
-
-if.then:                                          ; preds = %for.body
-  %arrayidx = getelementptr inbounds i32, i32* %readarr, i32 %i.05
-  %tmp2 = load i32, i32* %arrayidx, align 4
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %for.body
-  %t.0 = phi i32 [ %tmp2, %if.then ], [ 0, %for.body ]
-  %arrayidx2 = getelementptr inbounds i32, i32* %arr, i32 %i.05
-  store i32 %t.0, i32* %arrayidx2, align 4
-  %inc = add nuw nsw i32 %i.05, 1
-  %tmp = load i32, i32* %end, align 4
-  %cmp = icmp slt i32 %inc, %tmp
-  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge:                       ; preds = %if.end
-  br label %for.end
-
-for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry.split
-  ret void
-}
-

Copied: polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll (from r311248, polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll)
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll?p2=polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll&p1=polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll&r1=311248&r2=311259&rev=311259&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll (original)
+++ polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll Sat Aug 19 13:21:22 2017
@@ -1,34 +1,15 @@
-; RUN: opt %loadPolly -analyze -polly-use-llvm-names -polly-scops \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP
-
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
+  ; RUN: opt %loadPolly -polly-use-llvm-names -polly-scops \
+; RUN: -polly-invariant-load-hoisting -polly-codegen-ppcg \
+; RUN: -polly-acc-dump-code -disable-output \
+; RUN:   < %s | FileCheck %s -check-prefix=CODE
+
+; RUN: opt %loadPolly -polly-use-llvm-names -polly-scops \
+; RUN: -polly-invariant-load-hoisting -polly-codegen-ppcg \
+; RUN: -polly-acc-dump-kernel-ir -disable-output \
+; RUN:   < %s | FileCheck %s -check-prefix=KERNELIR
 
 ; REQUIRES: pollyacc
 
-; SCOP:      Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-; SCOP-NEXT: Max Loop Depth:  1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT:         ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT:             [tmp, tmp1] -> { Stmt_if_end[i0] -> MemRef_end[0] };
-; SCOP-NEXT:         Execution Context: [tmp, tmp1] -> {  :  }
-; SCOP-NEXT:         ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT:             [tmp, tmp1] -> { Stmt_for_body[i0] -> MemRef_control[0] };
-; SCOP-NEXT:         Execution Context: [tmp, tmp1] -> {  : tmp > 0 }
-; SCOP-NEXT: }
-
-; Check that we generate a correct "always false" branch.
-; HOST-IR:  br i1 false, label %polly.start, label %entry.split.pre_entry_bb
-
-; This test case checks that we generate correct code if PPCGCodeGeneration
-; decides a build is unsuccessful with invariant load hoisting enabled.
-;
-; There is a conditional branch which switches between the original code and
-; the new code. We try to set this conditional branch to branch on false.
-; However, invariant load hoisting changes the structure of the scop, so we
-; need to change the way we *locate* this instruction.
-;
 ;    void f(const int *end, int *arr, const int *control, const int *readarr) {
 ;      for (int i = 0; i < *end; i++) {
 ;        int t = 0;
@@ -38,7 +19,20 @@
 ;        arr[i] = t;
 ;      }
 ;    }
-;
+
+; This test case tests the ability to infer that `t` is local to each loop
+; iteration, and can therefore be privatized.
+
+; CODE: # kernel0
+; CODE-NEXT: for (int c0 = 0; c0 <= (tmp - 32 * b0 - 1) / 1048576; c0 += 1)
+; CODE-NEXT:   if (tmp >= 32 * b0 + t0 + 1048576 * c0 + 1) {
+; CODE-NEXT:     Stmt_for_body(32 * b0 + t0 + 1048576 * c0);
+; CODE-NEXT:     if (tmp1 >= 4)
+; CODE-NEXT:       Stmt_if_then(32 * b0 + t0 + 1048576 * c0);
+; CODE-NEXT:     Stmt_if_end(32 * b0 + t0 + 1048576 * c0);
+; CODE-NEXT:   }
+
+; KERNELIR: %private_array = alloca i32
 
 target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.12.0"

Modified: polly/trunk/test/GPGPU/non-read-only-scalars.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/non-read-only-scalars.ll?rev=311259&r1=311258&r2=311259&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/non-read-only-scalars.ll (original)
+++ polly/trunk/test/GPGPU/non-read-only-scalars.ll Sat Aug 19 13:21:22 2017
@@ -68,11 +68,16 @@
 ; CODE-NEXT: Stmt_bb17();
 
 ; CODE: # kernel2
-; CODE-NEXT: for (int c0 = 0; c0 <= 32; c0 += 1) {
-; CODE-NEXT:   Stmt_bb18(c0);
-; CODE-NEXT:   if (c0 <= 31)
-; CODE-NEXT:     Stmt_bb20(c0);
-; CODE-NEXT: }
+; CODE_NEXT: {
+; CODE_NEXT:   read();
+; CODE_NEXT:   for (int c0 = 0; c0 <= 32; c0 += 1) {
+; CODE_NEXT:     Stmt_bb18(c0);
+; CODE_NEXT:     if (c0 <= 31)
+; CODE_NEXT:       Stmt_bb20(c0);
+; CODE_NEXT:   }
+; CODE_NEXT:   write();
+; CODE_NEXT: }
+
 
 ; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_sum_0__phi)
 ; KERNEL-IR:  store float 0.000000e+00, float* %sum.0.phiops

Added: polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll?rev=311259&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll (added)
+++ polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll Sat Aug 19 13:21:22 2017
@@ -0,0 +1,66 @@
+; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-scops  \
+; RUN: -polly-acc-dump-code -analyze \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP
+
+; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
+; RUN: -polly-acc-dump-code \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=CODE
+
+; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
+
+; REQUIRES: pollyacc
+
+; SCOP:      Invariant Accesses: {
+; SCOP-NEXT:         ReadAccess :=	[Reduction Type: NONE] [Scalar: 0]
+; SCOP-NEXT:             { Stmt_loop[i0] -> MemRef_p[0] };
+; SCOP-NEXT:         Execution Context: {  :  }
+; SCOP-NEXT: }
+
+; CODE: # kernel0
+; CODE-NEXT: {
+; CODE-NEXT:   if (32 * b0 + t0 <= 1025) {
+; CODE-NEXT:     Stmt_loop(32 * b0 + t0);
+; CODE-NEXT:     write(0);
+; CODE-NEXT:   }
+; CODE-NEXT:   sync0();
+; CODE-NEXT: }
+
+; Check that we generate a correct "always false" branch.
+; HOST-IR:  br i1 false, label %polly.start, label %loop.pre_entry_bb
+
+; This test case checks that we generate correct code if PPCGCodeGeneration
+; decides a build is unsuccessful with invariant load hoisting enabled.
+;
+; There is a conditional branch which switches between the original code and
+; the new code. We try to set this conditional branch to branch on false.
+; However, invariant load hoisting changes the structure of the scop, so we
+; need to change the way we *locate* this instruction.
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.12.0"
+
+define void @foo(float* %A, float* %p) {
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i64 [0, %entry], [%indvar.next, %loop]
+  %indvar.next = add i64 %indvar, 1
+  %invariant = load float, float* %p
+  %ptr = getelementptr float, float* %A, i64 %indvar
+  store float 42.0, float* %ptr
+  %cmp = icmp sle i64 %indvar, 1024
+  br i1 %cmp, label %loop, label %loop2
+
+loop2:
+  %indvar2 = phi i64 [0, %loop], [%indvar2.next, %loop2]
+  %indvar2f = phi float [%invariant, %loop], [%indvar2f, %loop2]
+  %indvar2.next = add i64 %indvar2, 1
+  store float %indvar2f, float* %A
+  %cmp2 = icmp sle i64 %indvar2, 1024
+  br i1 %cmp2, label %loop2, label %end
+
+end:
+  ret void
+}




More information about the llvm-commits mailing list