[polly] r275535 - GPGPU: Model array access information
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 15 00:05:54 PDT 2016
Author: grosser
Date: Fri Jul 15 02:05:54 2016
New Revision: 275535
URL: http://llvm.org/viewvc/llvm-project?rev=275535&view=rev
Log:
GPGPU: Model array access information
This allows us to derive host-device and device-host data-transfers.
Modified:
polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
polly/trunk/lib/External/ppcg/gpu.c
polly/trunk/lib/External/ppcg/gpu.h
polly/trunk/test/GPGPU/double-parallel-loop.ll
Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=275535&r1=275534&r2=275535&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Fri Jul 15 02:05:54 2016
@@ -265,6 +265,34 @@ public:
return PPCGScop;
}
+ /// Collect the array acesses in a statement.
+ ///
+ /// @param Stmt The statement for which to collect the accesses.
+ ///
+ /// @returns A list of array accesses.
+ gpu_stmt_access *getStmtAccesses(ScopStmt &Stmt) {
+ gpu_stmt_access *Accesses = nullptr;
+
+ for (MemoryAccess *Acc : Stmt) {
+ auto Access = isl_alloc_type(S->getIslCtx(), struct gpu_stmt_access);
+ Access->read = Acc->isRead();
+ Access->write = Acc->isWrite();
+ Access->access = Acc->getAccessRelation();
+ isl_space *Space = isl_map_get_space(Access->access);
+ Space = isl_space_range(Space);
+ Space = isl_space_from_range(Space);
+ isl_map *Universe = isl_map_universe(Space);
+ Access->tagged_access =
+ isl_map_domain_product(Acc->getAccessRelation(), Universe);
+ Access->exact_write = Acc->isWrite();
+ Access->ref_id = Acc->getId();
+ Access->next = Accesses;
+ Accesses = Access;
+ }
+
+ return Accesses;
+ }
+
/// Collect the list of GPU statements.
///
/// Each statement has an id, a pointer to the underlying data structure,
@@ -285,13 +313,121 @@ public:
// We use the pet stmt pointer to keep track of the Polly statements.
GPUStmt->stmt = (pet_stmt *)&Stmt;
- GPUStmt->accesses = nullptr;
+ GPUStmt->accesses = getStmtAccesses(Stmt);
i++;
}
return Stmts;
}
+ /// Derive the extent of an array.
+ ///
+ /// The extent of an array is defined by the set of memory locations for
+ /// which a memory access in the iteration domain exists.
+ ///
+ /// @param Array The array to derive the extent for.
+ ///
+ /// @returns An isl_set describing the extent of the array.
+ __isl_give isl_set *getExtent(ScopArrayInfo *Array) {
+ isl_union_map *Accesses = S->getAccesses();
+ Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains());
+ isl_union_set *AccessUSet = isl_union_map_range(Accesses);
+ isl_set *AccessSet =
+ isl_union_set_extract_set(AccessUSet, Array->getSpace());
+ isl_union_set_free(AccessUSet);
+
+ return AccessSet;
+ }
+
+ /// Derive the bounds of an array.
+ ///
+ /// For the first dimension we derive the bound of the array from the extent
+ /// of this dimension. For inner dimensions we obtain their size directly from
+ /// ScopArrayInfo.
+ ///
+ /// @param PPCGArray The array to compute bounds for.
+ /// @param Array The polly array from which to take the information.
+ void setArrayBounds(gpu_array_info &PPCGArray, ScopArrayInfo *Array) {
+ if (PPCGArray.n_index > 0) {
+ isl_set *Dom = isl_set_copy(PPCGArray.extent);
+ Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1);
+ isl_pw_aff *Bound = isl_set_dim_max(isl_set_copy(Dom), 0);
+ isl_set_free(Dom);
+ Dom = isl_pw_aff_domain(isl_pw_aff_copy(Bound));
+ isl_local_space *LS = isl_local_space_from_space(isl_set_get_space(Dom));
+ isl_aff *One = isl_aff_zero_on_domain(LS);
+ One = isl_aff_add_constant_si(One, 1);
+ Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One));
+ Bound = isl_pw_aff_gist(Bound, S->getContext());
+ PPCGArray.bound[0] = Bound;
+ }
+
+ for (unsigned i = 1; i < PPCGArray.n_index; ++i) {
+ isl_pw_aff *Bound = Array->getDimensionSizePw(i);
+ auto LS = isl_pw_aff_get_domain_space(Bound);
+ auto Aff = isl_multi_aff_zero(LS);
+ Bound = isl_pw_aff_pullback_multi_aff(Bound, Aff);
+ PPCGArray.bound[i] = Bound;
+ }
+ }
+
+ /// Create the arrays for @p PPCGProg.
+ ///
+ /// @param PPCGProg The program to compute the arrays for.
+ void createArrays(gpu_prog *PPCGProg) {
+ int i = 0;
+ for (auto &Element : S->arrays()) {
+ ScopArrayInfo *Array = Element.second.get();
+
+ std::string TypeName;
+ raw_string_ostream OS(TypeName);
+
+ OS << *Array->getElementType();
+ TypeName = OS.str();
+
+ gpu_array_info &PPCGArray = PPCGProg->array[i];
+
+ PPCGArray.space = Array->getSpace();
+ PPCGArray.type = strdup(TypeName.c_str());
+ PPCGArray.size = Array->getElementType()->getPrimitiveSizeInBits() / 8;
+ PPCGArray.name = strdup(Array->getName().c_str());
+ PPCGArray.extent = nullptr;
+ PPCGArray.n_index = Array->getNumberOfDimensions();
+ PPCGArray.bound =
+ isl_alloc_array(S->getIslCtx(), isl_pw_aff *, PPCGArray.n_index);
+ PPCGArray.extent = getExtent(Array);
+ PPCGArray.n_ref = 0;
+ PPCGArray.refs = nullptr;
+ PPCGArray.accessed = true;
+ PPCGArray.read_only_scalar = false;
+ PPCGArray.has_compound_element = false;
+ PPCGArray.local = false;
+ PPCGArray.declare_local = false;
+ PPCGArray.global = false;
+ PPCGArray.linearize = false;
+ PPCGArray.dep_order = nullptr;
+
+ setArrayBounds(PPCGArray, Array);
+ }
+ }
+
+ /// Create an identity map between the arrays in the scop.
+ ///
+ /// @returns An identity map between the arrays in the scop.
+ isl_union_map *getArrayIdentity() {
+ isl_union_map *Maps = isl_union_map_empty(S->getParamSpace());
+
+ for (auto &Item : S->arrays()) {
+ ScopArrayInfo *Array = Item.second.get();
+ isl_space *Space = Array->getSpace();
+ Space = isl_space_map_from_set(Space);
+ isl_map *Identity = isl_map_identity(Space);
+ Maps = isl_union_map_add_map(Maps, Identity);
+ }
+
+ return Maps;
+ }
+
/// Create a default-initialized PPCG GPU program.
///
/// @returns A new gpu grogram description.
@@ -305,19 +441,23 @@ public:
PPCGProg->ctx = S->getIslCtx();
PPCGProg->scop = PPCGScop;
PPCGProg->context = isl_set_copy(PPCGScop->context);
- PPCGProg->read = nullptr;
- PPCGProg->may_write = nullptr;
- PPCGProg->must_write = nullptr;
- PPCGProg->tagged_must_kill = nullptr;
- PPCGProg->may_persist = nullptr;
- PPCGProg->to_outer = nullptr;
- PPCGProg->to_inner = nullptr;
+ PPCGProg->read = isl_union_map_copy(PPCGScop->reads);
+ PPCGProg->may_write = isl_union_map_copy(PPCGScop->may_writes);
+ PPCGProg->must_write = isl_union_map_copy(PPCGScop->must_writes);
+ PPCGProg->tagged_must_kill =
+ isl_union_map_copy(PPCGScop->tagged_must_kills);
+ PPCGProg->to_inner = getArrayIdentity();
+ PPCGProg->to_outer = getArrayIdentity();
+ PPCGProg->may_persist = compute_may_persist(PPCGProg);
PPCGProg->any_to_outer = nullptr;
PPCGProg->array_order = nullptr;
PPCGProg->n_stmts = std::distance(S->begin(), S->end());
PPCGProg->stmts = getStatements();
- PPCGProg->n_array = 0;
- PPCGProg->array = nullptr;
+ PPCGProg->n_array = std::distance(S->array_begin(), S->array_end());
+ PPCGProg->array = isl_calloc_array(S->getIslCtx(), struct gpu_array_info,
+ PPCGProg->n_array);
+
+ createArrays(PPCGProg);
return PPCGProg;
}
Modified: polly/trunk/lib/External/ppcg/gpu.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.c?rev=275535&r1=275534&r2=275535&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.c (original)
+++ polly/trunk/lib/External/ppcg/gpu.c Fri Jul 15 02:05:54 2016
@@ -5309,7 +5309,7 @@ int generate_gpu(isl_ctx *ctx, const cha
* arrays that are not local to "prog" and remove those elements that
* are definitely killed or definitely written by "prog".
*/
-static __isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog)
+__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog)
{
int i;
isl_union_set *may_persist, *killed;
Modified: polly/trunk/lib/External/ppcg/gpu.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.h?rev=275535&r1=275534&r2=275535&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.h (original)
+++ polly/trunk/lib/External/ppcg/gpu.h Fri Jul 15 02:05:54 2016
@@ -369,4 +369,6 @@ __isl_give isl_schedule *map_to_device(s
__isl_take isl_schedule *schedule);
__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
__isl_take isl_schedule *schedule);
+
+__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog);
#endif
Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=275535&r1=275534&r2=275535&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Fri Jul 15 02:05:54 2016
@@ -23,10 +23,15 @@
; SCHED-NEXT: child:
; SCHED-NEXT: context: "{ [] }"
; SCHED-NEXT: child:
-; SCHED-NEXT: extension: "{ }"
+; SCHED-NEXT: extension: "{ [] -> from_device_MemRef_A[]; [] -> to_device_MemRef_A[] }"
; SCHED-NEXT: child:
; SCHED-NEXT: sequence:
-; SCHED-NEXT: - filter: "{ }"
+; SCHED-NEXT: - filter: "{ to_device_MemRef_A[] }"
+; SCHED-NEXT: child:
+; SCHED-NEXT: set:
+; SCHED-NEXT: - filter: "{ to_device_MemRef_A[] }"
+; SCHED-NEXT: child:
+; SCHED-NEXT: guard: "{ [] }"
; SCHED-NEXT: - filter: "{ Stmt_bb5[i0, i1] }"
; SCHED-NEXT: child:
; SCHED-NEXT: guard: "{ [] }"
@@ -46,16 +51,26 @@
; SCHED-NEXT: schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/16) - 2*floor((i1)/32))] }]"
; SCHED-NEXT: permutable: 1
; SCHED-NEXT: coincident: [ 1, 1 ]
-; SCHED-NEXT: - filter: "{ }"
+; SCHED-NEXT: - filter: "{ from_device_MemRef_A[] }"
+; SCHED-NEXT: child:
+; SCHED-NEXT: set:
+; SCHED-NEXT: - filter: "{ from_device_MemRef_A[] }"
+; SCHED-NEXT: child:
+; SCHED-NEXT: guard: "{ [] }"
; CODE: Code
; CODE-NEXT: ====
; CODE-NEXT: # host
; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(16, 32);
-; CODE-NEXT: dim3 k0_dimGrid(32, 32);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
-; CODE-NEXT: cudaCheckKernel();
+; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyHostToDevice));
+; CODE-NEXT: {
+; CODE-NEXT: dim3 k0_dimBlock(16, 32);
+; CODE-NEXT: dim3 k0_dimGrid(32, 32);
+; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
+; CODE-NEXT: cudaCheckKernel();
+; CODE-NEXT: }
+
+; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyDeviceToHost));
; CODE-NEXT: }
; CODE: # kernel0
@@ -63,6 +78,7 @@
; CODE-NEXT: Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3);
+
; void double_parallel_loop(float A[][1024]) {
; for (long i = 0; i < 1024; i++)
; for (long j = 0; j < 1024; j++)
More information about the llvm-commits
mailing list