[polly] r308625 - [PPCGCodeGen] [3/3] Update PPCGCodeGen + tests to latest ppcg.
Siddharth Bhat via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 20 08:48:36 PDT 2017
Author: bollu
Date: Thu Jul 20 08:48:36 2017
New Revision: 308625
URL: http://llvm.org/viewvc/llvm-project?rev=308625&view=rev
Log:
[PPCGCodeGen] [3/3] Update PPCGCodeGen + tests to latest ppcg.
This commit *WILL COMPILE*.
1. `PPCG` now uses `isl_multi_pw_aff` instead of an array of `pw_aff`.
This needs us to adjust how we index array bounds and how we construct
array bounds.
2. `PPCG` introduces two new kinds of nodes: `init_device` and `clear_device`.
We should investigate what the correct way to handle these are.
3. `PPCG` has gotten smarter with its use of live range reordering, so some of
the tests have a qualitative improvement.
4. `PPCG` changed its output style, so many test cases need to be updated to
fit the new style for `polly-acc-dump-code` checks.
Differential Revision: https://reviews.llvm.org/D35677
Modified:
polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
polly/trunk/test/GPGPU/host-control-flow.ll
polly/trunk/test/GPGPU/host-statement.ll
polly/trunk/test/GPGPU/invalid-kernel.ll
polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
polly/trunk/test/GPGPU/mostly-sequential.ll
polly/trunk/test/GPGPU/non-read-only-scalars.ll
polly/trunk/test/GPGPU/non-zero-array-offset.ll
polly/trunk/test/GPGPU/parametric-loop-bound.ll
polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll
polly/trunk/test/GPGPU/region-stmt.ll
polly/trunk/test/GPGPU/scheduler-timeout.ll
polly/trunk/test/GPGPU/size-cast.ll
polly/trunk/test/GPGPU/untouched-arrays.ll
Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Thu Jul 20 08:48:36 2017
@@ -137,7 +137,11 @@ struct MustKillsInfo {
/// [params] -> { [Stmt_phantom[] -> ref_phantom[]] -> scalar_to_kill[] }
isl::union_map TaggedMustKills;
- MustKillsInfo() : KillsSchedule(nullptr), TaggedMustKills(nullptr){};
+ /// Tagged must kills stripped of the tags.
+ /// [params] -> { Stmt_phantom[] -> scalar_to_kill[] }
+ isl::union_map MustKills;
+
+ MustKillsInfo() : KillsSchedule(nullptr) {}
};
/// Check if SAI's uses are entirely contained within Scop S.
@@ -179,6 +183,7 @@ static MustKillsInfo computeMustKillsInf
}
Info.TaggedMustKills = isl::union_map::empty(isl::space(ParamSpace));
+ Info.MustKills = isl::union_map::empty(isl::space(ParamSpace));
// Initialising KillsSchedule to `isl_set_empty` creates an empty node in the
// schedule:
@@ -225,6 +230,9 @@ static MustKillsInfo computeMustKillsInf
isl::map TaggedMustKill = StmtToScalar.domain_product(PhantomRefToScalar);
Info.TaggedMustKills = Info.TaggedMustKills.unite(TaggedMustKill);
+ // 2. [param] -> { Stmt[] -> scalar_to_kill[] }
+ Info.MustKills = Info.TaggedMustKills.domain_factor_domain();
+
// 3. Create the kill schedule of the form:
// "[param] -> { Stmt_phantom[] }"
// Then add this to Info.KillsSchedule.
@@ -1004,11 +1012,11 @@ Value *GPUNodeBuilder::getArraySize(gpu_
Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
if (!gpu_array_is_scalar(Array)) {
- auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]);
+ auto OffsetDimZero = isl_multi_pw_aff_get_pw_aff(Array->bound, 0);
isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero);
for (unsigned int i = 1; i < Array->n_index; i++) {
- isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]);
+ isl_pw_aff *Bound_I = isl_multi_pw_aff_get_pw_aff(Array->bound, i);
isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
Res = isl_ast_expr_mul(Res, Expr);
}
@@ -1048,7 +1056,7 @@ Value *GPUNodeBuilder::getArrayOffset(gp
for (long i = 0; i < isl_set_dim(Min, isl_dim_set); i++) {
if (i > 0) {
- isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i - 1]);
+ isl_pw_aff *Bound_I = isl_multi_pw_aff_get_pw_aff(Array->bound, i - 1);
isl_ast_expr *BExpr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
Result = isl_ast_expr_mul(Result, BExpr);
}
@@ -1152,7 +1160,18 @@ void GPUNodeBuilder::createUser(__isl_ta
isl_ast_expr_free(Expr);
return;
}
-
+ if (!strcmp(Str, "init_device")) {
+ initializeAfterRTH();
+ isl_ast_node_free(UserStmt);
+ isl_ast_expr_free(Expr);
+ return;
+ }
+ if (!strcmp(Str, "clear_device")) {
+ finalize();
+ isl_ast_node_free(UserStmt);
+ isl_ast_expr_free(Expr);
+ return;
+ }
if (isPrefix(Str, "to_device")) {
if (!ManagedMemory)
createDataTransfer(UserStmt, HOST_TO_DEVICE);
@@ -1766,7 +1785,7 @@ GPUNodeBuilder::createKernelFunctionDecl
Sizes.push_back(nullptr);
for (long j = 1; j < Kernel->array[i].array->n_index; j++) {
isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff(
- Build, isl_pw_aff_copy(Kernel->array[i].array->bound[j]));
+ Build, isl_multi_pw_aff_get_pw_aff(Kernel->array[i].array->bound, j));
auto V = ExprBuilder.create(DimSize);
Sizes.push_back(SE.getSCEV(V));
}
@@ -2127,6 +2146,7 @@ public:
Options->debug = DebugOptions;
+ Options->group_chains = false;
Options->reschedule = true;
Options->scale_tile_loops = false;
Options->wrap = false;
@@ -2135,8 +2155,11 @@ public:
Options->ctx = nullptr;
Options->sizes = nullptr;
+ Options->tile = true;
Options->tile_size = 32;
+ Options->isolate_full_tiles = false;
+
Options->use_private_memory = PrivateMemory;
Options->use_shared_memory = SharedMemory;
Options->max_shared_memory = 48 * 1024;
@@ -2144,8 +2167,14 @@ public:
Options->target = PPCG_TARGET_CUDA;
Options->openmp = false;
Options->linearize_device_arrays = true;
- Options->live_range_reordering = false;
+ Options->allow_gnu_extensions = false;
+
+ Options->unroll_copy_shared = false;
+ Options->unroll_gpu_tile = false;
+ Options->live_range_reordering = true;
+ Options->live_range_reordering = true;
+ Options->hybrid = false;
Options->opencl_compiler_options = nullptr;
Options->opencl_use_gpu = false;
Options->opencl_n_include_file = 0;
@@ -2260,6 +2289,8 @@ public:
///
/// @returns A new ppcg scop.
ppcg_scop *createPPCGScop() {
+ MustKillsInfo KillsInfo = computeMustKillsInfo(*S);
+
auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop));
PPCGScop->options = createPPCGOptions();
@@ -2271,7 +2302,8 @@ public:
PPCGScop->context = S->getContext();
PPCGScop->domain = S->getDomains();
- PPCGScop->call = nullptr;
+ // TODO: investigate this further. PPCG calls collect_call_domains.
+ PPCGScop->call = isl_union_set_from_set(S->getContext());
PPCGScop->tagged_reads = getTaggedReads();
PPCGScop->reads = S->getReads();
PPCGScop->live_in = nullptr;
@@ -2280,6 +2312,9 @@ public:
PPCGScop->tagged_must_writes = getTaggedMustWrites();
PPCGScop->must_writes = S->getMustWrites();
PPCGScop->live_out = nullptr;
+ PPCGScop->tagged_must_kills = KillsInfo.TaggedMustKills.take();
+ PPCGScop->must_kills = KillsInfo.MustKills.take();
+
PPCGScop->tagger = nullptr;
PPCGScop->independence =
isl_union_map_empty(isl_set_get_space(PPCGScop->context));
@@ -2291,19 +2326,17 @@ public:
PPCGScop->tagged_dep_order = nullptr;
PPCGScop->schedule = S->getScheduleTree();
-
- MustKillsInfo KillsInfo = computeMustKillsInfo(*S);
// If we have something non-trivial to kill, add it to the schedule
if (KillsInfo.KillsSchedule.get())
PPCGScop->schedule = isl_schedule_sequence(
PPCGScop->schedule, KillsInfo.KillsSchedule.take());
- PPCGScop->tagged_must_kills = KillsInfo.TaggedMustKills.take();
PPCGScop->names = getNames();
PPCGScop->pet = nullptr;
compute_tagger(PPCGScop);
compute_dependences(PPCGScop);
+ eliminate_dead_code(PPCGScop);
return PPCGScop;
}
@@ -2458,14 +2491,23 @@ public:
/// @param PPCGArray The array to compute bounds for.
/// @param Array The polly array from which to take the information.
void setArrayBounds(gpu_array_info &PPCGArray, ScopArrayInfo *Array) {
+ isl_pw_aff_list *BoundsList =
+ isl_pw_aff_list_alloc(S->getIslCtx(), PPCGArray.n_index);
+ std::vector<isl::pw_aff> PwAffs;
+
+ isl_space *AlignSpace = S->getParamSpace();
+ AlignSpace = isl_space_add_dims(AlignSpace, isl_dim_set, 1);
+
if (PPCGArray.n_index > 0) {
if (isl_set_is_empty(PPCGArray.extent)) {
isl_set *Dom = isl_set_copy(PPCGArray.extent);
isl_local_space *LS = isl_local_space_from_space(
isl_space_params(isl_set_get_space(Dom)));
isl_set_free(Dom);
- isl_aff *Zero = isl_aff_zero_on_domain(LS);
- PPCGArray.bound[0] = isl_pw_aff_from_aff(Zero);
+ isl_pw_aff *Zero = isl_pw_aff_from_aff(isl_aff_zero_on_domain(LS));
+ Zero = isl_pw_aff_align_params(Zero, isl_space_copy(AlignSpace));
+ PwAffs.push_back(isl::manage(isl_pw_aff_copy(Zero)));
+ BoundsList = isl_pw_aff_list_insert(BoundsList, 0, Zero);
} else {
isl_set *Dom = isl_set_copy(PPCGArray.extent);
Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1);
@@ -2478,7 +2520,9 @@ public:
One = isl_aff_add_constant_si(One, 1);
Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One));
Bound = isl_pw_aff_gist(Bound, S->getContext());
- PPCGArray.bound[0] = Bound;
+ Bound = isl_pw_aff_align_params(Bound, isl_space_copy(AlignSpace));
+ PwAffs.push_back(isl::manage(isl_pw_aff_copy(Bound)));
+ BoundsList = isl_pw_aff_list_insert(BoundsList, 0, Bound);
}
}
@@ -2487,8 +2531,20 @@ public:
auto LS = isl_pw_aff_get_domain_space(Bound);
auto Aff = isl_multi_aff_zero(LS);
Bound = isl_pw_aff_pullback_multi_aff(Bound, Aff);
- PPCGArray.bound[i] = Bound;
+ Bound = isl_pw_aff_align_params(Bound, isl_space_copy(AlignSpace));
+ PwAffs.push_back(isl::manage(isl_pw_aff_copy(Bound)));
+ BoundsList = isl_pw_aff_list_insert(BoundsList, i, Bound);
}
+
+ isl_space_free(AlignSpace);
+ isl_space *BoundsSpace = isl_set_get_space(PPCGArray.extent);
+
+ assert(BoundsSpace && "Unable to access space of array.");
+ assert(BoundsList && "Unable to access list of bounds.");
+
+ PPCGArray.bound =
+ isl_multi_pw_aff_from_pw_aff_list(BoundsSpace, BoundsList);
+ assert(PPCGArray.bound && "PPCGArray.bound was not constructed correctly.");
}
/// Create the arrays for @p PPCGProg.
@@ -2511,8 +2567,6 @@ public:
PPCGArray.name = strdup(Array->getName().c_str());
PPCGArray.extent = nullptr;
PPCGArray.n_index = Array->getNumberOfDimensions();
- PPCGArray.bound =
- isl_alloc_array(S->getIslCtx(), isl_pw_aff *, PPCGArray.n_index);
PPCGArray.extent = getExtent(Array);
PPCGArray.n_ref = 0;
PPCGArray.refs = nullptr;
@@ -2527,6 +2581,7 @@ public:
PPCGArray.dep_order = nullptr;
PPCGArray.user = Array;
+ PPCGArray.bound = nullptr;
setArrayBounds(PPCGArray, Array);
i++;
@@ -2570,6 +2625,7 @@ public:
isl_union_map_copy(PPCGScop->tagged_must_kills);
PPCGProg->to_inner = getArrayIdentity();
PPCGProg->to_outer = getArrayIdentity();
+ // TODO: verify that this assignment is correct.
PPCGProg->any_to_outer = nullptr;
// this needs to be set when live range reordering is enabled.
@@ -2962,15 +3018,16 @@ public:
Condition = isl_ast_expr_and(Condition, SufficientCompute);
isl_ast_build_free(Build);
+ // preload invariant loads. Note: This should happen before the RTC
+ // because the RTC may depend on values that are invariant load hoisted.
+ NodeBuilder.preloadInvariantLoads();
+
Value *RTC = NodeBuilder.createRTC(Condition);
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
Builder.SetInsertPoint(&*StartBlock->begin());
- NodeBuilder.initializeAfterRTH();
- NodeBuilder.preloadInvariantLoads();
NodeBuilder.create(Root);
- NodeBuilder.finalize();
/// In case a sequential kernel has more surrounding loops as any parallel
/// kernel, the SCoP is probably mostly sequential. Hence, there is no
Modified: polly/trunk/test/GPGPU/host-control-flow.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/host-control-flow.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/host-control-flow.ll (original)
+++ polly/trunk/test/GPGPU/host-control-flow.ll Thu Jul 20 08:48:36 2017
@@ -14,9 +14,7 @@
; REQUIRES: pollyacc
-; CODE: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (2) * (100) * sizeof(float), cudaMemcpyHostToDevice));
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (2) * (100) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: for (int c0 = 0; c0 <= 99; c0 += 1)
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
@@ -26,6 +24,7 @@
; CODE-NEXT: }
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (2) * (100) * sizeof(float), cudaMemcpyDeviceToHost));
+; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A));
; CODE-NEXT: }
; IR-LABEL: polly.loop_header: ; preds = %polly.loop_header, %polly.loop_preheader
Modified: polly/trunk/test/GPGPU/host-statement.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/host-statement.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/host-statement.ll (original)
+++ polly/trunk/test/GPGPU/host-statement.ll Thu Jul 20 08:48:36 2017
@@ -18,11 +18,7 @@ declare void @llvm.lifetime.start(i64, i
; This test case tests that we can correctly handle a ScopStmt that is
; scheduled on the host, instead of within a kernel.
-; CODE-LABEL: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_Q, MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: {
Modified: polly/trunk/test/GPGPU/invalid-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/invalid-kernel.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/invalid-kernel.ll (original)
+++ polly/trunk/test/GPGPU/invalid-kernel.ll Thu Jul 20 08:48:36 2017
@@ -20,11 +20,7 @@
; were we still lack proper code-generation support. We check here that we
; detect the invalid IR and bail out gracefully.
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
@@ -34,7 +30,6 @@
; CODE-NEXT: }
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i64), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
; CODE: # kernel0
; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
Modified: polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll (original)
+++ polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll Thu Jul 20 08:48:36 2017
@@ -21,7 +21,7 @@
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_A)
+; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_B)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
@@ -36,7 +36,7 @@
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
-; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_B)
+; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_A)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
@@ -47,18 +47,19 @@
; KERNEL-NEXT: }
-; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_A)
+; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_B)
; IR-NEXT: [[SLOT:%.*]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
; IR-NEXT: store i8* [[DEVPTR]], i8** %polly_launch_0_param_0
; IR-NEXT: [[DATA:%.*]] = bitcast i8** %polly_launch_0_param_0 to i8*
; IR-NEXT: store i8* [[DATA]], i8** [[SLOT]]
-; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_B)
+; IR: [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_A)
; IR-NEXT: [[SLOT:%.*]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_1_params, i64 0, i64 0
; IR-NEXT: store i8* [[DEVPTR]], i8** %polly_launch_1_param_0
; IR-NEXT: [[DATA:%.*]] = bitcast i8** %polly_launch_1_param_0 to i8*
; IR-NEXT: store i8* [[DATA]], i8** [[SLOT]]
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @kernel_params_only_some_arrays(float* %A, float* %B) {
Modified: polly/trunk/test/GPGPU/mostly-sequential.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/mostly-sequential.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/mostly-sequential.ll (original)
+++ polly/trunk/test/GPGPU/mostly-sequential.ll Thu Jul 20 08:48:36 2017
@@ -2,9 +2,6 @@
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=CODE %s
-; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
-; RUN: FileCheck %s -check-prefix=IR
-
; REQUIRES: pollyacc
; void foo(float A[]) {
@@ -16,11 +13,7 @@
; A[42] += i + j;
; }
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (128) * sizeof(float), cudaMemcpyHostToDevice));
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (128) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(4);
@@ -28,26 +21,25 @@
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
-; CODE: for (int c0 = 0; c0 <= 127; c0 += 1)
-; CODE-NEXT: for (int c1 = 0; c1 <= 127; c1 += 1)
-; CODE-NEXT: {
+; CODE: {
; CODE-NEXT: dim3 k1_dimBlock;
; CODE-NEXT: dim3 k1_dimGrid;
-; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A, c0, c1);
+; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (128) * sizeof(float), cudaMemcpyDeviceToHost));
+; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A));
; CODE-NEXT: }
; CODE: # kernel0
; CODE-NEXT: Stmt_bb4(32 * b0 + t0);
; CODE: # kernel1
-; CODE-NEXT: Stmt_bb14(c0, c1);
+; CODE-NEXT: for (int c0 = 0; c0 <= 127; c0 += 1)
+; CODE-NEXT: for (int c1 = 0; c1 <= 127; c1 += 1)
+; CODE-NEXT: Stmt_bb14(c0, c1);
-; Verify that we identified this kernel as non-profitable.
-; IR: br i1 false, label %polly.start, label %bb3
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Modified: polly/trunk/test/GPGPU/non-read-only-scalars.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/non-read-only-scalars.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/non-read-only-scalars.ll (original)
+++ polly/trunk/test/GPGPU/non-read-only-scalars.ll Thu Jul 20 08:48:36 2017
@@ -31,12 +31,7 @@
; printf("%f\n", sum);
; }
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
+; CODE: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(1);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
@@ -49,25 +44,18 @@
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
-; CODE: for (int c0 = 0; c0 <= 32; c0 += 1) {
-; CODE-NEXT: {
+; CODE: {
; CODE-NEXT: dim3 k2_dimBlock;
; CODE-NEXT: dim3 k2_dimGrid;
-; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_sum_0__phi, dev_MemRef_sum_0, c0);
+; CODE-NEXT: kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_sum_0__phi, dev_MemRef_sum_0);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
-; CODE: if (c0 <= 31)
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k3_dimBlock;
-; CODE-NEXT: dim3 k3_dimGrid;
-; CODE-NEXT: kernel3 <<<k3_dimGrid, k3_dimBlock>>> (dev_MemRef_A, dev_MemRef_sum_0__phi, dev_MemRef_sum_0, c0);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
-
-; CODE: }
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (32) * sizeof(float), cudaMemcpyDeviceToHost));
+; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (32) * sizeof(float), cudaMemcpyDeviceToHost));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(&MemRef_sum_0, dev_MemRef_sum_0, sizeof(float), cudaMemcpyDeviceToHost));
+; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A));
+; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_sum_0__phi));
+; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_sum_0));
; CODE-NEXT: }
; CODE: # kernel0
@@ -80,19 +68,20 @@
; CODE-NEXT: Stmt_bb17();
; CODE: # kernel2
-; CODE-NEXT: Stmt_bb18(c0);
+; CODE-NEXT: for (int c0 = 0; c0 <= 32; c0 += 1) {
+; CODE-NEXT: Stmt_bb18(c0);
+; CODE-NEXT: if (c0 <= 31)
+; CODE-NEXT: Stmt_bb20(c0);
+; CODE-NEXT: }
+
+; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_1(i8 addrspace(1)* %MemRef_sum_0__phi)
+; KERNEL-IR: store float 0.000000e+00, float* %sum.0.phiops
+; KERNEL-IR: [[REGA:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0__phi to float*
+; KERNEL-IR: [[REGB:%.+]] = load float, float* %sum.0.phiops
+; KERNEL-IR: store float [[REGB]], float* [[REGA]]
-; CODE: # kernel3
-; CODE-NEXT: Stmt_bb20(c0);
+; KERNEL-IR: define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_2(i8 addrspace(1)* %MemRef_A, i8 addrspace(1)* %MemRef_sum_0__phi, i8 addrspace(1)* %MemRef_sum_0)
-; KERNEL-IR: store float %p_tmp23, float* %sum.0.phiops
-; KERNEL-IR-NEXT: [[REGA:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0__phi to float*
-; KERNEL-IR-NEXT: [[REGB:%.+]] = load float, float* %sum.0.phiops
-; KERNEL-IR-NEXT: store float [[REGB]], float* [[REGA]]
-; KERNEL-IR-NEXT: [[REGC:%.+]] = addrspacecast i8 addrspace(1)* %MemRef_sum_0 to float*
-; KERNEL-IR-NEXT: [[REGD:%.+]] = load float, float* %sum.0.s2a
-; KERNEL-IR-NEXT: store float [[REGD]], float* [[REGC]]
-; KERNEL-IR-NEXT: ret void
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Modified: polly/trunk/test/GPGPU/non-zero-array-offset.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/non-zero-array-offset.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/non-zero-array-offset.ll (original)
+++ polly/trunk/test/GPGPU/non-zero-array-offset.ll Thu Jul 20 08:48:36 2017
@@ -7,35 +7,30 @@
;
; REQUIRES: pollyacc
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (16) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (8) * sizeof(float), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(8);
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (16) * sizeof(float), cudaMemcpyHostToDevice));
+; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (8) * sizeof(float), cudaMemcpyHostToDevice));
+
+; CODE: dim3 k0_dimBlock(8);
; CODE-NEXT: dim3 k0_dimGrid(1);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_B);
+; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
-; CODE: {
+; CODE: {
; CODE-NEXT: dim3 k1_dimBlock(8);
; CODE-NEXT: dim3 k1_dimGrid(1);
-; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A);
+; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_B);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_B, dev_MemRef_B, (16) * sizeof(float), cudaMemcpyDeviceToHost));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (8) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
; CODE: # kernel0
-; CODE-NEXT: Stmt_bb3(t0);
+; CODE-NEXT: Stmt_bb11(t0);
; CODE: # kernel1
-; CODE-NEXT: Stmt_bb11(t0);
+; CODE-NEXT: Stmt_bb3(t0);
; IR: %p_dev_array_MemRef_B = call i8* @polly_allocateMemoryForDevice(i64 32)
; IR-NEXT: %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 32)
Modified: polly/trunk/test/GPGPU/parametric-loop-bound.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/parametric-loop-bound.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/parametric-loop-bound.ll (original)
+++ polly/trunk/test/GPGPU/parametric-loop-bound.ll Thu Jul 20 08:48:36 2017
@@ -14,15 +14,16 @@
; }
; CODE: if (n >= 1) {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (n) * sizeof(i64), cudaMemcpyHostToDevice));
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (n) * sizeof(i64), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(n >= 1048546 ? 32768 : floord(n + 31, 32));
+; CODE-NEXT: dim3 k0_dimGrid(n >= 1048546 ? 32768 : (n + 31) / 32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, n);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
-; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (n) * sizeof(i64), cudaMemcpyDeviceToHost));
+; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (n) * sizeof(i64), cudaMemcpyDeviceToHost));
+; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A));
; CODE-NEXT: }
; CODE: # kernel0
Modified: polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll (original)
+++ polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll Thu Jul 20 08:48:36 2017
@@ -32,44 +32,28 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; CODE: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_out_l_055__phi, &MemRef_out_l_055__phi, sizeof(i32), cudaMemcpyHostToDevice));
-; CODE-NEXT: {
-; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(2);
-; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_out_l_055__phi, dev_MemRef_out_l_055, dev_MemRef_c);
-; CODE-NEXT: cudaCheckKernel();
-; CODE-NEXT: }
+; CODE: cudaCheckReturn(cudaMalloc((void **) &dev_MemRef_c, (50) * sizeof(i32)));
+
+; CODE: {
+; CODE-NEXT: dim3 k0_dimBlock(32);
+; CODE-NEXT: dim3 k0_dimGrid(2);
+; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_c);
+; CODE-NEXT: cudaCheckKernel();
+; CODE-NEXT: }
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_c, dev_MemRef_c, (50) * sizeof(i32), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
+; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_c));
; CODE: # kernel0
-; CODE-NEXT: if (32 * b0 + t0 <= 48) {
-; CODE-NEXT: if (b0 == 1 && t0 == 16)
-; CODE-NEXT: Stmt_for_cond1_preheader(0);
-; CODE-NEXT: Stmt_for_body17(0, 32 * b0 + t0);
-; CODE-NEXT: if (b0 == 1 && t0 == 16)
-; CODE-NEXT: Stmt_for_cond15_for_cond12_loopexit_crit_edge(0);
-; CODE-NEXT: }
-
-; IR: [[REGA:%.+]] = bitcast i32* %out_l.055.phiops to i8*
-; IR-NEXT: call void @polly_copyFromHostToDevice(i8* [[REGA]], i8* %p_dev_array_MemRef_out_l_055__phi, i64 4)
+; CODE-NEXT: if (32 * b0 + t0 <= 48)
+; CODE-NEXT: Stmt_for_body17(0, 32 * b0 + t0);
-; IR: [[REGC:%.+]] = bitcast i32* %38 to i8*
+; IR: [[REGC:%.+]] = bitcast i32* %27 to i8*
; IR-NEXT: call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196)
-; KERNEL-IR: entry:
-; KERNEL-IR-NEXT: %out_l.055.s2a = alloca i32
-; KERNEL-IR-NEXT: %out_l.055.phiops = alloca i32
-; KERNEL-IR-NEXT: %1 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055__phi to i32*
-; KERNEL-IR-NEXT: %2 = load i32, i32* %1
-; KERNEL-IR-NEXT: store i32 %2, i32* %out_l.055.phiops
-; KERNEL-IR-NEXT: %3 = addrspacecast i8 addrspace(1)* %MemRef_out_l_055 to i32*
-; KERNEL-IR-NEXT: %4 = load i32, i32* %3
-; KERNEL-IR-NEXT: store i32 %4, i32* %out_l.055.s2a
-
+; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c, i32) #0 {
+; KERNEL-IR: %polly.access.MemRef_c = getelementptr i32, i32 addrspace(1)* %polly.access.cast.MemRef_c, i64 %10
+; KERNEL-IR-NEXT: store i32 %0, i32 addrspace(1)* %polly.access.MemRef_c, align 4
define void @kernel_dynprog([50 x i32]* %c) {
entry:
Modified: polly/trunk/test/GPGPU/region-stmt.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/region-stmt.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/region-stmt.ll (original)
+++ polly/trunk/test/GPGPU/region-stmt.ll Thu Jul 20 08:48:36 2017
@@ -5,11 +5,7 @@
; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
; RUN: FileCheck %s -check-prefix=IR
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (128) * sizeof(float), cudaMemcpyHostToDevice));
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (128) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (128) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
@@ -19,7 +15,6 @@
; CODE-NEXT: }
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_B, dev_MemRef_B, (128) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
; CODE: # kernel0
; CODE-NEXT: Stmt_for_body__TO__if_end(32 * b0 + t0);
Modified: polly/trunk/test/GPGPU/scheduler-timeout.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/scheduler-timeout.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/scheduler-timeout.ll (original)
+++ polly/trunk/test/GPGPU/scheduler-timeout.ll Thu Jul 20 08:48:36 2017
@@ -27,11 +27,7 @@ target triple = "x86_64-unknown-linux-gn
; D[i][j] += tmp[i][k] * C[k][j];
; }
-; CODE:Code
-; CODE-NEXT:====
-; CODE-NEXT:# host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_D, MemRef_D, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_C, MemRef_C, (4096) * (4096) * sizeof(float), cudaMemcpyHostToDevice));
@@ -51,7 +47,6 @@ target triple = "x86_64-unknown-linux-gn
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_tmp, dev_MemRef_tmp, (4096) * (4096) * sizeof(float), cudaMemcpyDeviceToHost));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef_D, dev_MemRef_D, (4096) * (4096) * sizeof(float), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
; CODE: # kernel0
; CODE-NEXT: for (int c2 = 0; c2 <= 127; c2 += 1)
Modified: polly/trunk/test/GPGPU/size-cast.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/size-cast.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/size-cast.ll (original)
+++ polly/trunk/test/GPGPU/size-cast.ll Thu Jul 20 08:48:36 2017
@@ -9,20 +9,18 @@
; This test case ensures that we properly sign-extend the types we are using.
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: if (arg >= 1 && arg1 == 0) {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_arg3, MemRef_arg3, (arg) * sizeof(double), cudaMemcpyHostToDevice));
+; CODE: if (arg >= 1 && arg1 == 0) {
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_arg3, MemRef_arg3, (arg) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
-; CODE-NEXT: dim3 k0_dimGrid(arg >= 1048546 ? 32768 : floord(arg + 31, 32));
+; CODE-NEXT: dim3 k0_dimGrid(arg >= 1048546 ? 32768 : (arg + 31) / 32);
; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_arg3, dev_MemRef_arg2, arg, arg1);
; CODE-NEXT: cudaCheckKernel();
; CODE-NEXT: }
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_arg2, dev_MemRef_arg2, (arg) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT: }
+; CODE-NEXT cudaCheckReturn(cudaFree(dev_MemRef_arg3));
+; CODE-NEXT cudaCheckReturn(cudaFree(dev_MemRef_arg2));
; CODE: # kernel0
; CODE-NEXT: for (int c0 = 0; c0 <= (arg - 32 * b0 - 1) / 1048576; c0 += 1)
Modified: polly/trunk/test/GPGPU/untouched-arrays.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/untouched-arrays.ll?rev=308625&r1=308624&r2=308625&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/untouched-arrays.ll (original)
+++ polly/trunk/test/GPGPU/untouched-arrays.ll Thu Jul 20 08:48:36 2017
@@ -4,11 +4,7 @@
; REQUIRES: pollyacc
-; CODE: Code
-; CODE-NEXT: ====
-; CODE-NEXT: # host
-; CODE-NEXT: {
-; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_global_1, MemRef_global_1, (142) * sizeof(i32), cudaMemcpyHostToDevice));
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_global_1, MemRef_global_1, (142) * sizeof(i32), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(10);
; CODE-NEXT: dim3 k0_dimGrid(1);
@@ -17,6 +13,7 @@
; CODE-NEXT: }
; CODE: cudaCheckReturn(cudaMemcpy(MemRef_global_1, dev_MemRef_global_1, (142) * sizeof(i32), cudaMemcpyDeviceToHost));
+; CODE: cudaCheckReturn(cudaFree(dev_MemRef_global_1));
; CODE-NEXT: }
; CODE: # kernel0
More information about the llvm-commits
mailing list