[polly] r275436 - GPGPU: Generate an AST for the GPU-mapped schedule
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 14 08:51:38 PDT 2016
Author: grosser
Date: Thu Jul 14 10:51:37 2016
New Revision: 275436
URL: http://llvm.org/viewvc/llvm-project?rev=275436&view=rev
Log:
GPGPU: Generate an AST for the GPU-mapped schedule
For this we need to provide an explicit list of statements as they occur in
the polly::Scop to ppcg.
We also setup basic AST printing facilities to facilitate debugging. To allow
code reuse some (minor) changes in ppcg are have been necessary.
Modified:
polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
polly/trunk/lib/External/ppcg/cuda.c
polly/trunk/lib/External/ppcg/cuda.h
polly/trunk/lib/External/ppcg/gpu.c
polly/trunk/lib/External/ppcg/gpu.h
polly/trunk/test/GPGPU/double-parallel-loop.ll
Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=275436&r1=275435&r2=275436&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Thu Jul 14 10:51:37 2016
@@ -26,8 +26,11 @@
#include "isl/union_map.h"
extern "C" {
+#include "cuda.h"
#include "gpu.h"
+#include "gpu_print.h"
#include "ppcg.h"
+#include "schedule.h"
}
#include "llvm/Support/Debug.h"
@@ -41,6 +44,12 @@ static cl::opt<bool> DumpSchedule("polly
cl::desc("Dump the computed GPU Schedule"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
+
+static cl::opt<bool>
+ DumpCode("polly-acc-dump-code",
+ cl::desc("Dump C code describing the GPU mapping"), cl::Hidden,
+ cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
/// Create the ast expressions for a ScopStmt.
///
/// This function is a callback for to generate the ast expressions for each
@@ -256,6 +265,33 @@ public:
return PPCGScop;
}
+ /// Collect the list of GPU statements.
+ ///
+ /// Each statement has an id, a pointer to the underlying data structure,
+ /// as well as a list with all memory accesses.
+ ///
+ /// TODO: Initialize the list of memory accesses.
+ ///
+ /// @returns A linked-list of statements.
+ gpu_stmt *getStatements() {
+ gpu_stmt *Stmts = isl_calloc_array(S->getIslCtx(), struct gpu_stmt,
+ std::distance(S->begin(), S->end()));
+
+ int i = 0;
+ for (auto &Stmt : *S) {
+ gpu_stmt *GPUStmt = &Stmts[i];
+
+ GPUStmt->id = Stmt.getDomainId();
+
+ // We use the pet stmt pointer to keep track of the Polly statements.
+ GPUStmt->stmt = (pet_stmt *)&Stmt;
+ GPUStmt->accesses = nullptr;
+ i++;
+ }
+
+ return Stmts;
+ }
+
/// Create a default-initialized PPCG GPU program.
///
/// @returns A new gpu grogram description.
@@ -278,14 +314,90 @@ public:
PPCGProg->to_inner = nullptr;
PPCGProg->any_to_outer = nullptr;
PPCGProg->array_order = nullptr;
- PPCGProg->n_stmts = 0;
- PPCGProg->stmts = nullptr;
+ PPCGProg->n_stmts = std::distance(S->begin(), S->end());
+ PPCGProg->stmts = getStatements();
PPCGProg->n_array = 0;
PPCGProg->array = nullptr;
return PPCGProg;
}
+ struct PrintGPUUserData {
+ struct cuda_info *CudaInfo;
+ struct gpu_prog *PPCGProg;
+ std::vector<ppcg_kernel *> Kernels;
+ };
+
+ /// Print a user statement node in the host code.
+ ///
+ /// We use ppcg's printing facilities to print the actual statement and
+ /// additionally build up a list of all kernels that are encountered in the
+ /// host ast.
+ ///
+ /// @param P The printer to print to
+ /// @param Options The printing options to use
+ /// @param Node The node to print
+ /// @param User A user pointer to carry additional data. This pointer is
+ /// expected to be of type PrintGPUUserData.
+ ///
+ /// @returns A printer to which the output has been printed.
+ static __isl_give isl_printer *
+ printHostUser(__isl_take isl_printer *P,
+ __isl_take isl_ast_print_options *Options,
+ __isl_take isl_ast_node *Node, void *User) {
+ auto Data = (struct PrintGPUUserData *)User;
+ auto Id = isl_ast_node_get_annotation(Node);
+
+ if (Id) {
+ auto Kernel = (struct ppcg_kernel *)isl_id_get_user(Id);
+ isl_id_free(Id);
+ Data->Kernels.push_back(Kernel);
+ }
+
+ return print_host_user(P, Options, Node, User);
+ }
+
+ /// Print C code corresponding to the control flow in @p Kernel.
+ ///
+ /// @param Kernel The kernel to print
+ void printKernel(ppcg_kernel *Kernel) {
+ auto *P = isl_printer_to_str(S->getIslCtx());
+ P = isl_printer_set_output_format(P, ISL_FORMAT_C);
+ auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
+ P = isl_ast_node_print(Kernel->tree, P, Options);
+ char *String = isl_printer_get_str(P);
+ printf("%s\n", String);
+ free(String);
+ isl_printer_free(P);
+ }
+
+ /// Print C code corresponding to the GPU code described by @p Tree.
+ ///
+ /// @param Tree An AST describing GPU code
+ /// @param PPCGProg The PPCG program from which @Tree has been constructed.
+ void printGPUTree(isl_ast_node *Tree, gpu_prog *PPCGProg) {
+ auto *P = isl_printer_to_str(S->getIslCtx());
+ P = isl_printer_set_output_format(P, ISL_FORMAT_C);
+
+ PrintGPUUserData Data;
+ Data.PPCGProg = PPCGProg;
+
+ auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
+ Options =
+ isl_ast_print_options_set_print_user(Options, printHostUser, &Data);
+ P = isl_ast_node_print(Tree, P, Options);
+ char *String = isl_printer_get_str(P);
+ printf("# host\n");
+ printf("%s\n", String);
+ free(String);
+ isl_printer_free(P);
+
+ for (auto Kernel : Data.Kernels) {
+ printf("# kernel%d\n", Kernel->id);
+ printKernel(Kernel);
+ }
+ }
+
// Generate a GPU program using PPCG.
//
// GPU mapping consists of multiple steps:
@@ -322,10 +434,12 @@ public:
int has_permutable = has_any_permutable_node(Schedule);
- if (!has_permutable || has_permutable < 0)
+ if (!has_permutable || has_permutable < 0) {
Schedule = isl_schedule_free(Schedule);
- else
+ } else {
Schedule = map_to_device(PPCGGen, Schedule);
+ PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule));
+ }
if (DumpSchedule) {
isl_printer *P = isl_printer_to_str(S->getIslCtx());
@@ -341,6 +455,15 @@ public:
isl_printer_free(P);
}
+ if (DumpCode) {
+ printf("Code\n");
+ printf("====\n");
+ if (PPCGGen->tree)
+ printGPUTree(PPCGGen->tree, PPCGProg);
+ else
+ printf("No code generated\n");
+ }
+
isl_schedule_free(Schedule);
return PPCGGen;
Modified: polly/trunk/lib/External/ppcg/cuda.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/cuda.c?rev=275436&r1=275435&r2=275436&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/cuda.c (original)
+++ polly/trunk/lib/External/ppcg/cuda.c Thu Jul 14 10:51:37 2016
@@ -153,20 +153,20 @@ static __isl_give isl_printer *copy_arra
return p;
}
-static void print_reverse_list(FILE *out, int len, int *list)
+static isl_printer *print_reverse_list(isl_printer *p, int len, int *list)
{
int i;
if (len == 0)
- return;
+ return p;
- fprintf(out, "(");
+ p = isl_printer_print_str(p, "(");
for (i = 0; i < len; ++i) {
if (i)
- fprintf(out, ", ");
- fprintf(out, "%d", list[len - 1 - i]);
+ p = isl_printer_print_str(p, ", ");
+ p = isl_printer_print_int(p, list[len - 1 - i]);
}
- fprintf(out, ")");
+ return isl_printer_print_str(p, ")");
}
/* Print the effective grid size as a list of the sizes in each
@@ -534,7 +534,7 @@ struct print_host_user_data {
* In case of a kernel launch, print a block of statements that
* defines the grid and the block and then launches the kernel.
*/
-static __isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
+__isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
__isl_take isl_ast_print_options *print_options,
__isl_keep isl_ast_node *node, void *user)
{
@@ -569,8 +569,7 @@ static __isl_give isl_printer *print_hos
p = isl_printer_print_str(p, "dim3 k");
p = isl_printer_print_int(p, kernel->id);
p = isl_printer_print_str(p, "_dimBlock");
- print_reverse_list(isl_printer_get_file(p),
- kernel->n_block, kernel->block_dim);
+ p = print_reverse_list(p, kernel->n_block, kernel->block_dim);
p = isl_printer_print_str(p, ";");
p = isl_printer_end_line(p);
@@ -600,7 +599,9 @@ static __isl_give isl_printer *print_hos
p = isl_printer_start_line(p);
p = isl_printer_end_line(p);
+#if 0
print_kernel(data->prog, kernel, data->cuda);
+#endif
return p;
}
Modified: polly/trunk/lib/External/ppcg/cuda.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/cuda.h?rev=275436&r1=275435&r2=275436&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/cuda.h (original)
+++ polly/trunk/lib/External/ppcg/cuda.h Thu Jul 14 10:51:37 2016
@@ -6,5 +6,8 @@
int generate_cuda(isl_ctx *ctx, struct ppcg_options *options,
const char *input);
+__isl_give isl_printer *print_host_user(__isl_take isl_printer *p,
+ __isl_take isl_ast_print_options *print_options,
+ __isl_keep isl_ast_node *node, void *user);
#endif
Modified: polly/trunk/lib/External/ppcg/gpu.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.c?rev=275436&r1=275435&r2=275436&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.c (original)
+++ polly/trunk/lib/External/ppcg/gpu.c Thu Jul 14 10:51:37 2016
@@ -2297,7 +2297,7 @@ static isl_bool update_depth(__isl_keep
* The ASTs for the device code are embedded in ppcg_kernel objects
* attached to the leaf nodes that call "kernel".
*/
-static __isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
+__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
__isl_take isl_schedule *schedule)
{
struct ppcg_at_domain_data data;
Modified: polly/trunk/lib/External/ppcg/gpu.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.h?rev=275436&r1=275435&r2=275436&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.h (original)
+++ polly/trunk/lib/External/ppcg/gpu.h Thu Jul 14 10:51:37 2016
@@ -367,4 +367,6 @@ __isl_give isl_schedule *get_schedule(st
int has_any_permutable_node(__isl_keep isl_schedule *schedule);
__isl_give isl_schedule *map_to_device(struct gpu_gen *gen,
__isl_take isl_schedule *schedule);
+__isl_give isl_ast_node *generate_code(struct gpu_gen *gen,
+ __isl_take isl_schedule *schedule);
#endif
Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=275436&r1=275435&r2=275436&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Thu Jul 14 10:51:37 2016
@@ -3,6 +3,10 @@
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=SCHED %s
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck -check-prefix=CODE %s
+
; REQUIRES: pollyacc
; CHECK: Stmt_bb5
@@ -44,6 +48,20 @@
; SCHED: coincident: [ 1, 1 ]
; SCHED: - filter: "{ }"
+; CODE: Code
+; CODE: ====
+; CODE: # host
+; CODE: {
+; CODE: dim3 k0_dimBlock(16, 32);
+; CODE: dim3 k0_dimGrid(32, 32);
+; CODE: kernel0 <<<k0_dimGrid, k0_dimBlock>>> ();
+; CODE: }
+
+; CODE: # kernel0
+; CODE: for (int c3 = 0; c3 <= 1; c3 += 1)
+; CODE: Stmt_bb5(32 * b0 + t0, 32 * b1 + t1 + 16 * c3);
+
+
; void double_parallel_loop(float A[][1024]) {
; for (long i = 0; i < 1024; i++)
; for (long j = 0; j < 1024; j++)
More information about the llvm-commits
mailing list