[polly] r278212 - [GPGPU] Ensure arrays where only parts are modified are copied to GPU

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 10 03:58:19 PDT 2016


Author: grosser
Date: Wed Aug 10 05:58:19 2016
New Revision: 278212

URL: http://llvm.org/viewvc/llvm-project?rev=278212&view=rev
Log:
[GPGPU] Ensure arrays where only parts are modified are copied to GPU

To do so we change the way array exents are computed. Instead of the precise
set of memory locations accessed, we now compute the extent as the range between
minimal and maximal address in the first dimension and the full extent defined
by the sizes of the inner array dimensions.

We also move the computation of the may_persist region after the construction
of the arrays, as it relies on array information. Without arrays being
constructed no useful information is computed at all.

Added:
    polly/trunk/test/GPGPU/only-part-of-array-modified.ll
Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/host-statement.ll

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=278212&r1=278211&r2=278212&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Wed Aug 10 05:58:19 2016
@@ -1700,21 +1700,78 @@ public:
 
   /// Derive the extent of an array.
   ///
-  /// The extent of an array is defined by the set of memory locations for
-  /// which a memory access in the iteration domain exists.
+  /// The extent of an array is the set of elements that are within the
+  /// accessed array. For the inner dimensions, the extent constraints are
+  /// 0 and the size of the corresponding array dimension. For the first
+  /// (outermost) dimension, the extent constraints are the minimal and maximal
+  /// subscript value for the first dimension.
   ///
   /// @param Array The array to derive the extent for.
   ///
   /// @returns An isl_set describing the extent of the array.
   __isl_give isl_set *getExtent(ScopArrayInfo *Array) {
+    unsigned NumDims = Array->getNumberOfDimensions();
     isl_union_map *Accesses = S->getAccesses();
     Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains());
+    Accesses = isl_union_map_detect_equalities(Accesses);
     isl_union_set *AccessUSet = isl_union_map_range(Accesses);
+    AccessUSet = isl_union_set_coalesce(AccessUSet);
+    AccessUSet = isl_union_set_detect_equalities(AccessUSet);
+    AccessUSet = isl_union_set_coalesce(AccessUSet);
+
+    if (isl_union_set_is_empty(AccessUSet)) {
+      isl_union_set_free(AccessUSet);
+      return isl_set_empty(Array->getSpace());
+    }
+
+    if (Array->getNumberOfDimensions() == 0) {
+      isl_union_set_free(AccessUSet);
+      return isl_set_universe(Array->getSpace());
+    }
+
     isl_set *AccessSet =
         isl_union_set_extract_set(AccessUSet, Array->getSpace());
+
     isl_union_set_free(AccessUSet);
+    isl_local_space *LS = isl_local_space_from_space(Array->getSpace());
+
+    isl_pw_aff *Val =
+        isl_pw_aff_from_aff(isl_aff_var_on_domain(LS, isl_dim_set, 0));
 
-    return AccessSet;
+    isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0);
+    isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0);
+    OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in,
+                                   isl_pw_aff_dim(Val, isl_dim_in));
+    OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in,
+                                   isl_pw_aff_dim(Val, isl_dim_in));
+    OuterMin =
+        isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId());
+    OuterMax =
+        isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId());
+
+    isl_set *Extent = isl_set_universe(Array->getSpace());
+
+    Extent = isl_set_intersect(
+        Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val)));
+    Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val));
+
+    for (unsigned i = 1; i < NumDims; ++i)
+      Extent = isl_set_lower_bound_si(Extent, isl_dim_set, i, 0);
+
+    for (unsigned i = 1; i < NumDims; ++i) {
+      isl_pw_aff *PwAff =
+          const_cast<isl_pw_aff *>(Array->getDimensionSizePw(i));
+      isl_pw_aff *Val = isl_pw_aff_from_aff(isl_aff_var_on_domain(
+          isl_local_space_from_space(Array->getSpace()), isl_dim_set, i));
+      PwAff = isl_pw_aff_add_dims(PwAff, isl_dim_in,
+                                  isl_pw_aff_dim(Val, isl_dim_in));
+      PwAff = isl_pw_aff_set_tuple_id(PwAff, isl_dim_in,
+                                      isl_pw_aff_get_tuple_id(Val, isl_dim_in));
+      auto *Set = isl_pw_aff_gt_set(PwAff, Val);
+      Extent = isl_set_intersect(Set, Extent);
+    }
+
+    return Extent;
   }
 
   /// Derive the bounds of an array.
@@ -1827,7 +1884,6 @@ public:
         isl_union_map_copy(PPCGScop->tagged_must_kills);
     PPCGProg->to_inner = getArrayIdentity();
     PPCGProg->to_outer = getArrayIdentity();
-    PPCGProg->may_persist = compute_may_persist(PPCGProg);
     PPCGProg->any_to_outer = nullptr;
     PPCGProg->array_order = nullptr;
     PPCGProg->n_stmts = std::distance(S->begin(), S->end());
@@ -1838,6 +1894,8 @@ public:
 
     createArrays(PPCGProg);
 
+    PPCGProg->may_persist = compute_may_persist(PPCGProg);
+
     return PPCGProg;
   }
 

Modified: polly/trunk/test/GPGPU/host-statement.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/host-statement.ll?rev=278212&r1=278211&r2=278212&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/host-statement.ll (original)
+++ polly/trunk/test/GPGPU/host-statement.ll Wed Aug 10 05:58:19 2016
@@ -24,6 +24,7 @@ declare void @llvm.lifetime.start(i64, i
 ; CODE-NEXT: {
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_Q, MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
 ; CODE-NEXT:   {
 ; CODE-NEXT:     dim3 k0_dimBlock(32);
 ; CODE-NEXT:     dim3 k0_dimGrid(16);

Added: polly/trunk/test/GPGPU/only-part-of-array-modified.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/only-part-of-array-modified.ll?rev=278212&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/only-part-of-array-modified.ll (added)
+++ polly/trunk/test/GPGPU/only-part-of-array-modified.ll Wed Aug 10 05:58:19 2016
@@ -0,0 +1,42 @@
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck -check-prefix=CODE %s
+;
+; REQUIRES: pollyacc
+;
+;    void foo(float A[], float B[]) {
+;      for (long i = 0; i < 1024; i++)
+;        A[2 * i] = B[i];
+;    }
+
+; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i32), cudaMemcpyHostToDevice));
+; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (2047) * sizeof(i32), cudaMemcpyHostToDevice));
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(float* %A, float* %B) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb8, %bb
+  %i.0 = phi i64 [ 0, %bb ], [ %tmp9, %bb8 ]
+  %exitcond = icmp ne i64 %i.0, 1024
+  br i1 %exitcond, label %bb2, label %bb10
+
+bb2:                                              ; preds = %bb1
+  %tmp = getelementptr inbounds float, float* %B, i64 %i.0
+  %tmp3 = bitcast float* %tmp to i32*
+  %tmp4 = load i32, i32* %tmp3, align 4
+  %tmp5 = shl nsw i64 %i.0, 1
+  %tmp6 = getelementptr inbounds float, float* %A, i64 %tmp5
+  %tmp7 = bitcast float* %tmp6 to i32*
+  store i32 %tmp4, i32* %tmp7, align 4
+  br label %bb8
+
+bb8:                                              ; preds = %bb2
+  %tmp9 = add nuw nsw i64 %i.0, 1
+  br label %bb1
+
+bb10:                                             ; preds = %bb1
+  ret void
+}




More information about the llvm-commits mailing list