[polly] r275390 - GPGPU: compute new schedule from polly scop

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 14 03:22:25 PDT 2016


Author: grosser
Date: Thu Jul 14 05:22:25 2016
New Revision: 275390

URL: http://llvm.org/viewvc/llvm-project?rev=275390&view=rev
Log:
GPGPU: compute new schedule from polly scop

To do so we copy the necessary information to compute an initial schedule from
polly::Scop to ppcg's scop. Most of the necessary information is directly
available and only needs to be passed on to ppcg, with the exception of 'tagged'
access relations, access relations that additionally carry information about
which memory access an access relation originates from.

We could possibly perform the construction of tagged accesses as part of
ScopInfo, but as this format is currently specific to ppcg we do not do this
yet, but keep this functionality local to our GPU code generation.

After the scop has been initialized, we compute data dependences and ask ppcg to
compute an initial schedule. Some of this functionality is already available in
polly::DependenceInfo and polly::ScheduleOptimizer, but to keep differences
to ppcg small we use ppcg's functionality here. We may later investiage if
a closer integration of these tools makes sense.

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/lib/External/ppcg/gpu.c
    polly/trunk/lib/External/ppcg/gpu.h
    polly/trunk/lib/External/ppcg/ppcg.c
    polly/trunk/lib/External/ppcg/ppcg.h
    polly/trunk/test/GPGPU/double-parallel-loop.ll

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=275390&r1=275389&r2=275390&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Thu Jul 14 05:22:25 2016
@@ -15,6 +15,7 @@
 #include "polly/CodeGen/IslNodeBuilder.h"
 #include "polly/DependenceInfo.h"
 #include "polly/LinkAllPasses.h"
+#include "polly/Options.h"
 #include "polly/ScopInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -22,6 +23,8 @@
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 
+#include "isl/union_map.h"
+
 extern "C" {
 #include "gpu.h"
 #include "ppcg.h"
@@ -34,6 +37,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "polly-codegen-ppcg"
 
+static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule",
+                                  cl::desc("Dump the computed GPU Schedule"),
+                                  cl::Hidden, cl::init(true), cl::ZeroOrMore,
+                                  cl::cat(PollyCategory));
+
 namespace {
 class PPCGCodeGeneration : public ScopPass {
 public:
@@ -89,10 +97,70 @@ public:
     return Options;
   }
 
+  /// Get a tagged access relation containing all accesses of type @p AccessTy.
+  ///
+  /// Instead of a normal access of the form:
+  ///
+  ///   Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)]
+  ///
+  /// a tagged access has the form
+  ///
+  ///   [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)]
+  ///
+  /// where 'id' is an additional space that references the memory access that
+  /// triggered the access.
+  ///
+  /// @param AccessTy The type of the memory accesses to collect.
+  ///
+  /// @return The relation describing all tagged memory accesses.
+  isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) {
+    isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace());
+
+    for (auto &Stmt : *S)
+      for (auto &Acc : Stmt)
+        if (Acc->getType() == AccessTy) {
+          isl_map *Relation = Acc->getAccessRelation();
+          Relation = isl_map_intersect_domain(Relation, Stmt.getDomain());
+
+          isl_space *Space = isl_map_get_space(Relation);
+          Space = isl_space_range(Space);
+          Space = isl_space_from_range(Space);
+          isl_map *Universe = isl_map_universe(Space);
+          Relation = isl_map_domain_product(Relation, Universe);
+          Accesses = isl_union_map_add_map(Accesses, Relation);
+        }
+
+    return Accesses;
+  }
+
+  /// Get the set of all read accesses, tagged with the access id.
+  ///
+  /// @see getTaggedAccesses
+  isl_union_map *getTaggedReads() {
+    return getTaggedAccesses(MemoryAccess::READ);
+  }
+
+  /// Get the set of all may (and must) accesses, tagged with the access id.
+  ///
+  /// @see getTaggedAccesses
+  isl_union_map *getTaggedMayWrites() {
+    return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE),
+                               getTaggedAccesses(MemoryAccess::MUST_WRITE));
+  }
+
+  /// Get the set of all must accesses, tagged with the access id.
+  ///
+  /// @see getTaggedAccesses
+  isl_union_map *getTaggedMustWrites() {
+    return getTaggedAccesses(MemoryAccess::MUST_WRITE);
+  }
+
   /// Create a new PPCG scop from the current scop.
   ///
-  /// For now the created scop is initialized to 'zero' and does not contain
-  /// any scop-specific information.
+  /// The PPCG scop is initialized with data from the current polly::Scop. From
+  /// this initial data, the data-dependences in the PPCG scop are initialized.
+  /// We do not use Polly's dependence analysis for now, to ensure we match
+  /// the PPCG default behaviour more closely.
   ///
   /// @returns A new ppcg scop.
   ppcg_scop *createPPCGScop() {
@@ -103,18 +171,18 @@ public:
     PPCGScop->start = 0;
     PPCGScop->end = 0;
 
-    PPCGScop->context = nullptr;
-    PPCGScop->domain = nullptr;
+    PPCGScop->context = S->getContext();
+    PPCGScop->domain = S->getDomains();
     PPCGScop->call = nullptr;
-    PPCGScop->tagged_reads = nullptr;
-    PPCGScop->reads = nullptr;
+    PPCGScop->tagged_reads = getTaggedReads();
+    PPCGScop->reads = S->getReads();
     PPCGScop->live_in = nullptr;
-    PPCGScop->tagged_may_writes = nullptr;
-    PPCGScop->may_writes = nullptr;
-    PPCGScop->tagged_must_writes = nullptr;
-    PPCGScop->must_writes = nullptr;
+    PPCGScop->tagged_may_writes = getTaggedMayWrites();
+    PPCGScop->may_writes = S->getWrites();
+    PPCGScop->tagged_must_writes = getTaggedMustWrites();
+    PPCGScop->must_writes = S->getMustWrites();
     PPCGScop->live_out = nullptr;
-    PPCGScop->tagged_must_kills = nullptr;
+    PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace());
     PPCGScop->tagger = nullptr;
 
     PPCGScop->independence = nullptr;
@@ -125,11 +193,14 @@ public:
     PPCGScop->dep_order = nullptr;
     PPCGScop->tagged_dep_order = nullptr;
 
-    PPCGScop->schedule = nullptr;
+    PPCGScop->schedule = S->getScheduleTree();
     PPCGScop->names = nullptr;
 
     PPCGScop->pet = nullptr;
 
+    compute_tagger(PPCGScop);
+    compute_dependences(PPCGScop);
+
     return PPCGScop;
   }
 
@@ -163,11 +234,75 @@ public:
     return PPCGProg;
   }
 
+  // Generate a GPU program using PPCG.
+  //
+  // GPU mapping consists of multiple steps:
+  //
+  //  1) Compute new schedule for the program.
+  //  2) Map schedule to GPU (TODO)
+  //  3) Generate code for new schedule (TODO)
+  //
+  // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer
+  // is mostly CPU specific. Instead, we use PPCG's GPU code generation
+  // strategy directly from this pass.
+  gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) {
+
+    auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen);
+
+    PPCGGen->ctx = S->getIslCtx();
+    PPCGGen->options = PPCGScop->options;
+    PPCGGen->print = nullptr;
+    PPCGGen->print_user = nullptr;
+    PPCGGen->prog = PPCGProg;
+    PPCGGen->tree = nullptr;
+    PPCGGen->types.n = 0;
+    PPCGGen->types.name = nullptr;
+    PPCGGen->sizes = nullptr;
+    PPCGGen->used_sizes = nullptr;
+    PPCGGen->kernel_id = 0;
+
+    // Set scheduling strategy to same strategy PPCG is using.
+    isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true);
+    isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true);
+
+    isl_schedule *Schedule = get_schedule(PPCGGen);
+
+    if (DumpSchedule) {
+      isl_printer *P = isl_printer_to_str(S->getIslCtx());
+      P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
+      P = isl_printer_print_str(P, "Schedule\n");
+      P = isl_printer_print_str(P, "========\n");
+      if (Schedule)
+        P = isl_printer_print_schedule(P, Schedule);
+      else
+        P = isl_printer_print_str(P, "No schedule found\n");
+
+      printf("%s\n", isl_printer_get_str(P));
+      isl_printer_free(P);
+    }
+
+    isl_schedule_free(Schedule);
+
+    return PPCGGen;
+  }
+
+  /// Free gpu_gen structure.
+  ///
+  /// @param PPCGGen The ppcg_gen object to free.
+  void freePPCGGen(gpu_gen *PPCGGen) {
+    isl_ast_node_free(PPCGGen->tree);
+    isl_union_map_free(PPCGGen->sizes);
+    isl_union_map_free(PPCGGen->used_sizes);
+    free(PPCGGen);
+  }
+
   bool runOnScop(Scop &CurrentScop) override {
     S = &CurrentScop;
 
     auto PPCGScop = createPPCGScop();
     auto PPCGProg = createPPCGProg(PPCGScop);
+    auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
+    freePPCGGen(PPCGGen);
     gpu_prog_free(PPCGProg);
     ppcg_scop_free(PPCGScop);
 

Modified: polly/trunk/lib/External/ppcg/gpu.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.c?rev=275390&r1=275389&r2=275390&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.c (original)
+++ polly/trunk/lib/External/ppcg/gpu.c Thu Jul 14 05:22:25 2016
@@ -4265,7 +4265,7 @@ static __isl_give isl_schedule *determin
  * a file, by computing one or by determining the properties
  * of the original schedule.
  */
-static __isl_give isl_schedule *get_schedule(struct gpu_gen *gen)
+__isl_give isl_schedule *get_schedule(struct gpu_gen *gen)
 {
 	isl_schedule *schedule;
 

Modified: polly/trunk/lib/External/ppcg/gpu.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.h?rev=275390&r1=275389&r2=275390&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.h (original)
+++ polly/trunk/lib/External/ppcg/gpu.h Thu Jul 14 05:22:25 2016
@@ -352,4 +352,5 @@ int generate_gpu(isl_ctx *ctx, const cha
 		struct gpu_prog *prog, __isl_keep isl_ast_node *tree,
 		struct gpu_types *types, void *user), void *user);
 
+__isl_give isl_schedule *get_schedule(struct gpu_gen *gen);
 #endif

Modified: polly/trunk/lib/External/ppcg/ppcg.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/ppcg.c?rev=275390&r1=275389&r2=275390&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/ppcg.c (original)
+++ polly/trunk/lib/External/ppcg/ppcg.c Thu Jul 14 05:22:25 2016
@@ -336,7 +336,7 @@ static __isl_give isl_union_map *project
  *
  *	{ [S[i,j] -> R_1[]] -> S[i,j]; [S[i,j] -> R_2[]] -> S[i,j] }
  */
-static void compute_tagger(struct ppcg_scop *ps)
+void compute_tagger(struct ppcg_scop *ps)
 {
 	isl_union_map *tagged;
 	isl_union_pw_multi_aff *tagger;
@@ -694,7 +694,7 @@ static void compute_flow_dep(struct ppcg
  * set of order dependences and a set of external false dependences
  * in compute_live_range_reordering_dependences.
  */
-static void compute_dependences(struct ppcg_scop *scop)
+void compute_dependences(struct ppcg_scop *scop)
 {
 	isl_union_map *may_source;
 	isl_union_access_info *access;

Modified: polly/trunk/lib/External/ppcg/ppcg.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/ppcg.h?rev=275390&r1=275389&r2=275390&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/ppcg.h (original)
+++ polly/trunk/lib/External/ppcg/ppcg.h Thu Jul 14 05:22:25 2016
@@ -114,6 +114,8 @@ int ppcg_transform(isl_ctx *ctx, const c
 	__isl_give isl_printer *(*fn)(__isl_take isl_printer *p,
 		struct ppcg_scop *scop, void *user), void *user);
 
+void compute_tagger(struct ppcg_scop *ps);
+void compute_dependences(struct ppcg_scop *scop);
 void *ppcg_scop_free(struct ppcg_scop *ps);
 
 #endif

Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=275390&r1=275389&r2=275390&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Thu Jul 14 05:22:25 2016
@@ -1,5 +1,8 @@
 ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-schedule \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck -check-prefix=SCHED %s
+
 ; REQUIRES: pollyacc
 
 ; CHECK: Stmt_bb5
@@ -11,7 +14,13 @@
 ; CHECK:           { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] };
 ; CHECK:       MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
 ; CHECK:           { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] };
-;
+
+; SCHED: domain: "{ Stmt_bb5[i0, i1] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 }"
+; SCHED: child:
+; SCHED:   schedule: "[{ Stmt_bb5[i0, i1] -> [(i0)] }, { Stmt_bb5[i0, i1] -> [(i1)] }]"
+; SCHED:   permutable: 1
+; SCHED:   coincident: [ 1, 1 ]
+
 ;    void double_parallel_loop(float A[][1024]) {
 ;      for (long i = 0; i < 1024; i++)
 ;        for (long j = 0; j < 1024; j++)




More information about the llvm-commits mailing list