[polly] r311259 - [GPGPU] Correctly initialize array order and fixed_element information
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 19 13:21:22 PDT 2017
Author: grosser
Date: Sat Aug 19 13:21:22 2017
New Revision: 311259
URL: http://llvm.org/viewvc/llvm-project?rev=311259&view=rev
Log:
[GPGPU] Correctly initialize array order and fixed_element information
Summary:
This information is necessary for PPCG to perform correct life range reordering.
With these changes applied we can live-range reorder some of the important
kernels in COSMO.
We also update and rename one test case, which previously could not be optimized
and now is optimized thanks to live-range reordering. To preserve test coverage
we add a new test case scalar-writes-in-scop-requires-abort.ll, which exercises
our automatic abort in case of scalar writes in the kernel.
Reviewers: Meinersbur, bollu, singam-sanjay
Subscribers: nemanjai, pollydev, llvm-commits, kbarton
Tags: #polly
Differential Revision: https://reviews.llvm.org/D36929
Added:
polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll
- copied, changed from r311248, polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll
polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll
Removed:
polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll
Modified:
polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
polly/trunk/lib/External/ppcg/gpu.c
polly/trunk/lib/External/ppcg/gpu.h
polly/trunk/test/GPGPU/non-read-only-scalars.ll
Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=311259&r1=311258&r2=311259&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Sat Aug 19 13:21:22 2017
@@ -2815,6 +2815,9 @@ public:
Access->ref_id = Acc->getId().release();
Access->next = Accesses;
Access->n_index = Acc->getScopArrayInfo()->getNumberOfDimensions();
+ // TODO: Also mark one-element accesses to arrays as fixed-element.
+ Access->fixed_element =
+ Acc->isLatestScalarKind() ? isl_bool_true : isl_bool_false;
Accesses = Access;
}
@@ -3029,6 +3032,7 @@ public:
i++;
collect_references(PPCGProg, &PPCGArray);
+ PPCGArray.only_fixed_element = only_fixed_element_accessed(&PPCGArray);
}
}
@@ -3070,13 +3074,6 @@ public:
PPCGProg->to_outer = getArrayIdentity();
// TODO: verify that this assignment is correct.
PPCGProg->any_to_outer = nullptr;
-
- // this needs to be set when live range reordering is enabled.
- // NOTE: I believe that is conservatively correct. I'm not sure
- // what the semantics of this is.
- // Quoting PPCG/gpu.h: "Order dependences on non-scalars."
- PPCGProg->array_order =
- isl_union_map_empty(isl_set_get_space(PPCGScop->context));
PPCGProg->n_stmts = std::distance(S->begin(), S->end());
PPCGProg->stmts = getStatements();
@@ -3099,6 +3096,9 @@ public:
createArrays(PPCGProg, ValidSAIs);
+ PPCGProg->array_order = nullptr;
+ collect_order_dependences(PPCGProg);
+
PPCGProg->may_persist = compute_may_persist(PPCGProg);
return PPCGProg;
}
Modified: polly/trunk/lib/External/ppcg/gpu.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.c?rev=311259&r1=311258&r2=311259&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.c (original)
+++ polly/trunk/lib/External/ppcg/gpu.c Sat Aug 19 13:21:22 2017
@@ -162,7 +162,7 @@ static int is_read_only_scalar(struct gp
/* Is "array" only accessed as individual, fixed elements?
* That is, does each access to "array" access a single, fixed element?
*/
-static isl_bool only_fixed_element_accessed(struct gpu_array_info *array)
+isl_bool only_fixed_element_accessed(struct gpu_array_info *array)
{
int i;
@@ -250,6 +250,9 @@ static int extract_array_info(struct gpu
static __isl_give isl_union_map *remove_independences(struct gpu_prog *prog,
struct gpu_array_info *array, __isl_take isl_union_map *order)
{
+ // We do not have independence information in Polly. Hence, make this
+ // function a no-op.
+ return order;
int i;
for (i = 0; i < prog->scop->pet->n_independence; ++i) {
Modified: polly/trunk/lib/External/ppcg/gpu.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.h?rev=311259&r1=311258&r2=311259&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.h (original)
+++ polly/trunk/lib/External/ppcg/gpu.h Sat Aug 19 13:21:22 2017
@@ -454,4 +454,6 @@ __isl_give isl_ast_node *generate_code(s
__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog);
void collect_references(struct gpu_prog *prog, struct gpu_array_info *array);
+void collect_order_dependences(struct gpu_prog *prog);
+isl_bool only_fixed_element_accessed(struct gpu_array_info *array);
#endif
Removed: polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll?rev=311258&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll (original)
+++ polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll (removed)
@@ -1,84 +0,0 @@
-; RUN: opt %loadPolly -analyze -polly-use-llvm-names -polly-scops \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP
-
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
-
-; REQUIRES: pollyacc
-
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp, tmp1] -> { Stmt_if_end[i0] -> MemRef_end[0] };
-; SCOP-NEXT: Execution Context: [tmp, tmp1] -> { : }
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp, tmp1] -> { Stmt_for_body[i0] -> MemRef_control[0] };
-; SCOP-NEXT: Execution Context: [tmp, tmp1] -> { : tmp > 0 }
-; SCOP-NEXT: }
-
-; Check that we generate a correct "always false" branch.
-; HOST-IR: br i1 false, label %polly.start, label %entry.split.pre_entry_bb
-
-; This test case checks that we generate correct code if PPCGCodeGeneration
-; decides a build is unsuccessful with invariant load hoisting enabled.
-;
-; There is a conditional branch which switches between the original code and
-; the new code. We try to set this conditional branch to branch on false.
-; However, invariant load hoisting changes the structure of the scop, so we
-; need to change the way we *locate* this instruction.
-;
-; void f(const int *end, int *arr, const int *control, const int *readarr) {
-; for (int i = 0; i < *end; i++) {
-; int t = 0;
-; if (*control > 3) {
-; t += readarr[i];
-; }
-; arr[i] = t;
-; }
-; }
-;
-
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-target triple = "i386-apple-macosx10.12.0"
-
-define void @f(i32* %end, i32* %arr, i32* %control, i32* %readarr) {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %tmp3 = load i32, i32* %end, align 4
- %cmp4 = icmp sgt i32 %tmp3, 0
- br i1 %cmp4, label %for.body.lr.ph, label %for.end
-
-for.body.lr.ph: ; preds = %entry.split
- br label %for.body
-
-for.body: ; preds = %for.body.lr.ph, %if.end
- %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
- %tmp1 = load i32, i32* %control, align 4
- %cmp1 = icmp sgt i32 %tmp1, 3
- br i1 %cmp1, label %if.then, label %if.end
-
-if.then: ; preds = %for.body
- %arrayidx = getelementptr inbounds i32, i32* %readarr, i32 %i.05
- %tmp2 = load i32, i32* %arrayidx, align 4
- br label %if.end
-
-if.end: ; preds = %if.then, %for.body
- %t.0 = phi i32 [ %tmp2, %if.then ], [ 0, %for.body ]
- %arrayidx2 = getelementptr inbounds i32, i32* %arr, i32 %i.05
- store i32 %t.0, i32* %arrayidx2, align 4
- %inc = add nuw nsw i32 %i.05, 1
- %tmp = load i32, i32* %end, align 4
- %cmp = icmp slt i32 %inc, %tmp
- br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge: ; preds = %if.end
- br label %for.end
-
-for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split
- ret void
-}
-
Copied: polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll (from r311248, polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll)
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll?p2=polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll&p1=polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll&r1=311248&r2=311259&rev=311259&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/invariant-load-hoisting-with-failing-scop.ll (original)
+++ polly/trunk/test/GPGPU/live-range-reordering-with-privatization.ll Sat Aug 19 13:21:22 2017
@@ -1,34 +1,15 @@
-; RUN: opt %loadPolly -analyze -polly-use-llvm-names -polly-scops \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP
-
-; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
+ ; RUN: opt %loadPolly -polly-use-llvm-names -polly-scops \
+; RUN: -polly-invariant-load-hoisting -polly-codegen-ppcg \
+; RUN: -polly-acc-dump-code -disable-output \
+; RUN: < %s | FileCheck %s -check-prefix=CODE
+
+; RUN: opt %loadPolly -polly-use-llvm-names -polly-scops \
+; RUN: -polly-invariant-load-hoisting -polly-codegen-ppcg \
+; RUN: -polly-acc-dump-kernel-ir -disable-output \
+; RUN: < %s | FileCheck %s -check-prefix=KERNELIR
; REQUIRES: pollyacc
-; SCOP: Function: f
-; SCOP-NEXT: Region: %entry.split---%for.end
-; SCOP-NEXT: Max Loop Depth: 1
-; SCOP-NEXT: Invariant Accesses: {
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp, tmp1] -> { Stmt_if_end[i0] -> MemRef_end[0] };
-; SCOP-NEXT: Execution Context: [tmp, tmp1] -> { : }
-; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; SCOP-NEXT: [tmp, tmp1] -> { Stmt_for_body[i0] -> MemRef_control[0] };
-; SCOP-NEXT: Execution Context: [tmp, tmp1] -> { : tmp > 0 }
-; SCOP-NEXT: }
-
-; Check that we generate a correct "always false" branch.
-; HOST-IR: br i1 false, label %polly.start, label %entry.split.pre_entry_bb
-
-; This test case checks that we generate correct code if PPCGCodeGeneration
-; decides a build is unsuccessful with invariant load hoisting enabled.
-;
-; There is a conditional branch which switches between the original code and
-; the new code. We try to set this conditional branch to branch on false.
-; However, invariant load hoisting changes the structure of the scop, so we
-; need to change the way we *locate* this instruction.
-;
; void f(const int *end, int *arr, const int *control, const int *readarr) {
; for (int i = 0; i < *end; i++) {
; int t = 0;
@@ -38,7 +19,20 @@
; arr[i] = t;
; }
; }
-;
+
+; This test case tests the ability to infer that `t` is local to each loop
+; iteration, and can therefore be privatized.
+
+; CODE: # kernel0
+; CODE-NEXT: for (int c0 = 0; c0 <= (tmp - 32 * b0 - 1) / 1048576; c0 += 1)
+; CODE-NEXT: if (tmp >= 32 * b0 + t0 + 1048576 * c0 + 1) {
+; CODE-NEXT: Stmt_for_body(32 * b0 + t0 + 1048576 * c0);
+; CODE-NEXT: if (tmp1 >= 4)
+; CODE-NEXT: Stmt_if_then(32 * b0 + t0 + 1048576 * c0);
+; CODE-NEXT: Stmt_if_end(32 * b0 + t0 + 1048576 * c0);
+; CODE-NEXT: }
+
+; KERNELIR: %private_array = alloca i32
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
target triple = "i386-apple-macosx10.12.0"
Modified: polly/trunk/test/GPGPU/non-read-only-scalars.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/non-read-only-scalars.ll?rev=311259&r1=311258&r2=311259&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/non-read-only-scalars.ll (original)
+++ polly/trunk/test/GPGPU/non-read-only-scalars.ll Sat Aug 19 13:21:22 2017
@@ -68,11 +68,16 @@
; CODE-NEXT: Stmt_bb17();
; CODE: # kernel2
-; CODE-NEXT: for (int c0 = 0; c0 <= 32; c0 += 1) {
-; CODE-NEXT: Stmt_bb18(c0);
-; CODE-NEXT: if (c0 <= 31)
-; CODE-NEXT: Stmt_bb20(c0);
-; CODE-NEXT: }
+; CODE_NEXT: {
+; CODE_NEXT: read();
+; CODE_NEXT: for (int c0 = 0; c0 <= 32; c0 += 1) {
+; CODE_NEXT: Stmt_bb18(c0);
+; CODE_NEXT: if (c0 <= 31)
+; CODE_NEXT: Stmt_bb20(c0);
+; CODE_NEXT: }
+; CODE_NEXT: write();
+; CODE_NEXT: }
+
; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_sum_0__phi)
; KERNEL-IR: store float 0.000000e+00, float* %sum.0.phiops
Added: polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll?rev=311259&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll (added)
+++ polly/trunk/test/GPGPU/scalar-writes-in-scop-requires-abort.ll Sat Aug 19 13:21:22 2017
@@ -0,0 +1,66 @@
+; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-scops \
+; RUN: -polly-acc-dump-code -analyze \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=SCOP
+
+; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
+; RUN: -polly-acc-dump-code \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=CODE
+
+; RUN: opt %loadPolly -S -polly-use-llvm-names -polly-codegen-ppcg \
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s -check-prefix=HOST-IR
+
+; REQUIRES: pollyacc
+
+; SCOP: Invariant Accesses: {
+; SCOP-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; SCOP-NEXT: { Stmt_loop[i0] -> MemRef_p[0] };
+; SCOP-NEXT: Execution Context: { : }
+; SCOP-NEXT: }
+
+; CODE: # kernel0
+; CODE-NEXT: {
+; CODE-NEXT: if (32 * b0 + t0 <= 1025) {
+; CODE-NEXT: Stmt_loop(32 * b0 + t0);
+; CODE-NEXT: write(0);
+; CODE-NEXT: }
+; CODE-NEXT: sync0();
+; CODE-NEXT: }
+
+; Check that we generate a correct "always false" branch.
+; HOST-IR: br i1 false, label %polly.start, label %loop.pre_entry_bb
+
+; This test case checks that we generate correct code if PPCGCodeGeneration
+; decides a build is unsuccessful with invariant load hoisting enabled.
+;
+; There is a conditional branch which switches between the original code and
+; the new code. We try to set this conditional branch to branch on false.
+; However, invariant load hoisting changes the structure of the scop, so we
+; need to change the way we *locate* this instruction.
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.12.0"
+
+define void @foo(float* %A, float* %p) {
+entry:
+ br label %loop
+
+loop:
+ %indvar = phi i64 [0, %entry], [%indvar.next, %loop]
+ %indvar.next = add i64 %indvar, 1
+ %invariant = load float, float* %p
+ %ptr = getelementptr float, float* %A, i64 %indvar
+ store float 42.0, float* %ptr
+ %cmp = icmp sle i64 %indvar, 1024
+ br i1 %cmp, label %loop, label %loop2
+
+loop2:
+ %indvar2 = phi i64 [0, %loop], [%indvar2.next, %loop2]
+ %indvar2f = phi float [%invariant, %loop], [%indvar2f, %loop2]
+ %indvar2.next = add i64 %indvar2, 1
+ store float %indvar2f, float* %A
+ %cmp2 = icmp sle i64 %indvar2, 1024
+ br i1 %cmp2, label %loop2, label %end
+
+end:
+ ret void
+}
More information about the llvm-commits
mailing list