[polly] r275535 - GPGPU: Model array access information

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 15 00:05:54 PDT 2016


Author: grosser
Date: Fri Jul 15 02:05:54 2016
New Revision: 275535

URL: http://llvm.org/viewvc/llvm-project?rev=275535&view=rev
Log:
GPGPU: Model array access information

This allows us to derive host-device and device-host data-transfers.

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/lib/External/ppcg/gpu.c
    polly/trunk/lib/External/ppcg/gpu.h
    polly/trunk/test/GPGPU/double-parallel-loop.ll

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=275535&r1=275534&r2=275535&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Fri Jul 15 02:05:54 2016
@@ -265,6 +265,34 @@ public:
     return PPCGScop;
   }
 
+  /// Collect the array acesses in a statement.
+  ///
+  /// @param Stmt The statement for which to collect the accesses.
+  ///
+  /// @returns A list of array accesses.
+  gpu_stmt_access *getStmtAccesses(ScopStmt &Stmt) {
+    gpu_stmt_access *Accesses = nullptr;
+
+    for (MemoryAccess *Acc : Stmt) {
+      auto Access = isl_alloc_type(S->getIslCtx(), struct gpu_stmt_access);
+      Access->read = Acc->isRead();
+      Access->write = Acc->isWrite();
+      Access->access = Acc->getAccessRelation();
+      isl_space *Space = isl_map_get_space(Access->access);
+      Space = isl_space_range(Space);
+      Space = isl_space_from_range(Space);
+      isl_map *Universe = isl_map_universe(Space);
+      Access->tagged_access =
+          isl_map_domain_product(Acc->getAccessRelation(), Universe);
+      Access->exact_write = Acc->isWrite();
+      Access->ref_id = Acc->getId();
+      Access->next = Accesses;
+      Accesses = Access;
+    }
+
+    return Accesses;
+  }
+
   /// Collect the list of GPU statements.
   ///
   /// Each statement has an id, a pointer to the underlying data structure,
@@ -285,13 +313,121 @@ public:
 
       // We use the pet stmt pointer to keep track of the Polly statements.
       GPUStmt->stmt = (pet_stmt *)&Stmt;
-      GPUStmt->accesses = nullptr;
+      GPUStmt->accesses = getStmtAccesses(Stmt);
       i++;
     }
 
     return Stmts;
   }
 
+  /// Derive the extent of an array.
+  ///
+  /// The extent of an array is defined by the set of memory locations for
+  /// which a memory access in the iteration domain exists.
+  ///
+  /// @param Array The array to derive the extent for.
+  ///
+  /// @returns An isl_set describing the extent of the array.
+  __isl_give isl_set *getExtent(ScopArrayInfo *Array) {
+    isl_union_map *Accesses = S->getAccesses();
+    Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains());
+    isl_union_set *AccessUSet = isl_union_map_range(Accesses);
+    isl_set *AccessSet =
+        isl_union_set_extract_set(AccessUSet, Array->getSpace());
+    isl_union_set_free(AccessUSet);
+
+    return AccessSet;
+  }
+
+  /// Derive the bounds of an array.
+  ///
+  /// For the first dimension we derive the bound of the array from the extent
+  /// of this dimension. For inner dimensions we obtain their size directly from
+  /// ScopArrayInfo.
+  ///
+  /// @param PPCGArray The array to compute bounds for.
+  /// @param Array The polly array from which to take the information.
+  void setArrayBounds(gpu_array_info &PPCGArray, ScopArrayInfo *Array) {
+    if (PPCGArray.n_index > 0) {
+      isl_set *Dom = isl_set_copy(PPCGArray.extent);
+      Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1);
+      isl_pw_aff *Bound = isl_set_dim_max(isl_set_copy(Dom), 0);
+      isl_set_free(Dom);
+      Dom = isl_pw_aff_domain(isl_pw_aff_copy(Bound));
+      isl_local_space *LS = isl_local_space_from_space(isl_set_get_space(Dom));
+      isl_aff *One = isl_aff_zero_on_domain(LS);
+      One = isl_aff_add_constant_si(One, 1);
+      Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One));
+      Bound = isl_pw_aff_gist(Bound, S->getContext());
+      PPCGArray.bound[0] = Bound;
+    }
+
+    for (unsigned i = 1; i < PPCGArray.n_index; ++i) {
+      isl_pw_aff *Bound = Array->getDimensionSizePw(i);
+      auto LS = isl_pw_aff_get_domain_space(Bound);
+      auto Aff = isl_multi_aff_zero(LS);
+      Bound = isl_pw_aff_pullback_multi_aff(Bound, Aff);
+      PPCGArray.bound[i] = Bound;
+    }
+  }
+
+  /// Create the arrays for @p PPCGProg.
+  ///
+  /// @param PPCGProg The program to compute the arrays for.
+  void createArrays(gpu_prog *PPCGProg) {
+    int i = 0;
+    for (auto &Element : S->arrays()) {
+      ScopArrayInfo *Array = Element.second.get();
+
+      std::string TypeName;
+      raw_string_ostream OS(TypeName);
+
+      OS << *Array->getElementType();
+      TypeName = OS.str();
+
+      gpu_array_info &PPCGArray = PPCGProg->array[i];
+
+      PPCGArray.space = Array->getSpace();
+      PPCGArray.type = strdup(TypeName.c_str());
+      PPCGArray.size = Array->getElementType()->getPrimitiveSizeInBits() / 8;
+      PPCGArray.name = strdup(Array->getName().c_str());
+      PPCGArray.extent = nullptr;
+      PPCGArray.n_index = Array->getNumberOfDimensions();
+      PPCGArray.bound =
+          isl_alloc_array(S->getIslCtx(), isl_pw_aff *, PPCGArray.n_index);
+      PPCGArray.extent = getExtent(Array);
+      PPCGArray.n_ref = 0;
+      PPCGArray.refs = nullptr;
+      PPCGArray.accessed = true;
+      PPCGArray.read_only_scalar = false;
+      PPCGArray.has_compound_element = false;
+      PPCGArray.local = false;
+      PPCGArray.declare_local = false;
+      PPCGArray.global = false;
+      PPCGArray.linearize = false;
+      PPCGArray.dep_order = nullptr;
+
+      setArrayBounds(PPCGArray, Array);
+    }
+  }
+
+  /// Create an identity map between the arrays in the scop.
+  ///
+  /// @returns An identity map between the arrays in the scop.
+  isl_union_map *getArrayIdentity() {
+    isl_union_map *Maps = isl_union_map_empty(S->getParamSpace());
+
+    for (auto &Item : S->arrays()) {
+      ScopArrayInfo *Array = Item.second.get();
+      isl_space *Space = Array->getSpace();
+      Space = isl_space_map_from_set(Space);
+      isl_map *Identity = isl_map_identity(Space);
+      Maps = isl_union_map_add_map(Maps, Identity);
+    }
+
+    return Maps;
+  }
+
   /// Create a default-initialized PPCG GPU program.
   ///
   /// @returns A new gpu grogram description.
@@ -305,19 +441,23 @@ public:
     PPCGProg->ctx = S->getIslCtx();
     PPCGProg->scop = PPCGScop;
     PPCGProg->context = isl_set_copy(PPCGScop->context);
-    PPCGProg->read = nullptr;
-    PPCGProg->may_write = nullptr;
-    PPCGProg->must_write = nullptr;
-    PPCGProg->tagged_must_kill = nullptr;
-    PPCGProg->may_persist = nullptr;
-    PPCGProg->to_outer = nullptr;
-    PPCGProg->to_inner = nullptr;
+    PPCGProg->read = isl_union_map_copy(PPCGScop->reads);
+    PPCGProg->may_write = isl_union_map_copy(PPCGScop->may_writes);
+    PPCGProg->must_write = isl_union_map_copy(PPCGScop->must_writes);
+    PPCGProg->tagged_must_kill =
+        isl_union_map_copy(PPCGScop->tagged_must_kills);
+    PPCGProg->to_inner = getArrayIdentity();
+    PPCGProg->to_outer = getArrayIdentity();
+    PPCGProg->may_persist = compute_may_persist(PPCGProg);
     PPCGProg->any_to_outer = nullptr;
     PPCGProg->array_order = nullptr;
     PPCGProg->n_stmts = std::distance(S->begin(), S->end());
     PPCGProg->stmts = getStatements();
-    PPCGProg->n_array = 0;
-    PPCGProg->array = nullptr;
+    PPCGProg->n_array = std::distance(S->array_begin(), S->array_end());
+    PPCGProg->array = isl_calloc_array(S->getIslCtx(), struct gpu_array_info,
+                                       PPCGProg->n_array);
+
+    createArrays(PPCGProg);
 
     return PPCGProg;
   }

Modified: polly/trunk/lib/External/ppcg/gpu.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.c?rev=275535&r1=275534&r2=275535&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.c (original)
+++ polly/trunk/lib/External/ppcg/gpu.c Fri Jul 15 02:05:54 2016
@@ -5309,7 +5309,7 @@ int generate_gpu(isl_ctx *ctx, const cha
  * arrays that are not local to "prog" and remove those elements that
  * are definitely killed or definitely written by "prog".
  */
-static __isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog)
+__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog)
 {
 	int i;
 	isl_union_set *may_persist, *killed;

Modified: polly/trunk/lib/External/ppcg/gpu.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.h?rev=275535&r1=275534&r2=275535&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.h (original)
+++ polly/trunk/lib/External/ppcg/gpu.h Fri Jul 15 02:05:54 2016
@@ -369,4 +369,6 @@ __isl_give isl_schedule *map_to_device(s
                                        __isl_take isl_schedule *schedule);
 __isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
                                        __isl_take isl_schedule *schedule);
+
+__isl_give isl_union_set *compute_may_persist(struct gpu_prog *prog);
 #endif

Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=275535&r1=275534&r2=275535&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Fri Jul 15 02:05:54 2016
@@ -23,10 +23,15 @@
 ; SCHED-NEXT: child:
 ; SCHED-NEXT:   context: "{ [] }"
 ; SCHED-NEXT:   child:
-; SCHED-NEXT:     extension: "{  }"
+; SCHED-NEXT:     extension: "{ [] -> from_device_MemRef_A[]; [] -> to_device_MemRef_A[] }"
 ; SCHED-NEXT:     child:
 ; SCHED-NEXT:       sequence:
-; SCHED-NEXT:       - filter: "{  }"
+; SCHED-NEXT:       - filter: "{ to_device_MemRef_A[] }"
+; SCHED-NEXT:         child:
+; SCHED-NEXT:           set:
+; SCHED-NEXT:           - filter: "{ to_device_MemRef_A[] }"
+; SCHED-NEXT:             child:
+; SCHED-NEXT:               guard: "{ [] }"
 ; SCHED-NEXT:       - filter: "{ Stmt_bb5[i0, i1] }"
 ; SCHED-NEXT:         child:
 ; SCHED-NEXT:           guard: "{ [] }"
@@ -46,16 +51,26 @@
 ; SCHED-NEXT:                       schedule: "[{ Stmt_bb5[i0, i1] -> [(0)] }, { Stmt_bb5[i0, i1] -> [(floor((i1)/16) - 2*floor((i1)/32))] }]"
 ; SCHED-NEXT:                       permutable: 1
 ; SCHED-NEXT:                       coincident: [ 1, 1 ]
-; SCHED-NEXT:       - filter: "{  }"
+; SCHED-NEXT:       - filter: "{ from_device_MemRef_A[] }"
+; SCHED-NEXT:         child:
+; SCHED-NEXT:           set:
+; SCHED-NEXT:           - filter: "{ from_device_MemRef_A[] }"
+; SCHED-NEXT:             child:
+; SCHED-NEXT:               guard: "{ [] }"
 
 ; CODE: Code
 ; CODE-NEXT: ====
 ; CODE-NEXT: # host
 ; CODE-NEXT: {
-; CODE-NEXT:   dim3 k0_dimBlock(16, 32);
-; CODE-NEXT:   dim3 k0_dimGrid(32, 32);
-; CODE-NEXT:   kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
-; CODE-NEXT:   cudaCheckKernel();
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyHostToDevice));
+; CODE-NEXT:   {
+; CODE-NEXT:     dim3 k0_dimBlock(16, 32);
+; CODE-NEXT:     dim3 k0_dimGrid(32, 32);
+; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
+; CODE-NEXT:     cudaCheckKernel();
+; CODE-NEXT:   }
+
+; CODE:   cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * (1024) * sizeof(float), cudaMemcpyDeviceToHost));
 ; CODE-NEXT: }
 
 ; CODE: # kernel0
@@ -63,6 +78,7 @@
 ; CODE-NEXT:   Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3);
 
 
+
 ;    void double_parallel_loop(float A[][1024]) {
 ;      for (long i = 0; i < 1024; i++)
 ;        for (long j = 0; j < 1024; j++)




More information about the llvm-commits mailing list