[polly] r311239 - [GPGPU] Collect parameter dimension used in MemoryAccesses

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 19 05:58:28 PDT 2017


Author: grosser
Date: Sat Aug 19 05:58:28 2017
New Revision: 311239

URL: http://llvm.org/viewvc/llvm-project?rev=311239&view=rev
Log:
[GPGPU] Collect parameter dimension used in MemoryAccesses

When using -polly-ignore-integer-wrapping and -polly-acc-codegen-managed-memory
we add parameter dimensions lazily to the domains, which results in PPCG not
including parameter dimensions that are only used in memory accesses in the
kernel space. To make sure these parameters are still passed to the kernel, we
collect these parameter dimensions and align the kernel's parameter space
before code-generating it.

Added:
    polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll
Modified:
    polly/trunk/include/polly/CodeGen/IslNodeBuilder.h
    polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp

Modified: polly/trunk/include/polly/CodeGen/IslNodeBuilder.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/IslNodeBuilder.h?rev=311239&r1=311238&r2=311239&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/IslNodeBuilder.h (original)
+++ polly/trunk/include/polly/CodeGen/IslNodeBuilder.h Sat Aug 19 05:58:28 2017
@@ -23,6 +23,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "isl/ctx.h"
 #include "isl/union_map.h"
+
+#include "isl-noexceptions.h"
+
 #include <utility>
 #include <vector>
 
@@ -41,6 +44,9 @@ struct SubtreeReferences {
   SetVector<Value *> &Values;
   SetVector<const SCEV *> &SCEVs;
   BlockGenerator &BlockGen;
+  // In case an (optional) parameter space location is provided, parameter space
+  // information is collected as well.
+  isl::space *ParamSpace;
 };
 
 /// Extract the out-of-scop values and SCEVs referenced from a ScopStmt.
@@ -50,6 +56,10 @@ struct SubtreeReferences {
 /// statements we force the generation of alloca memory locations and list
 /// these locations in the set of out-of-scop values as well.
 ///
+/// We also collect an isl::space that includes all parameter dimensions
+/// used in the statement's memory accesses, in case the ParamSpace pointer
+/// is non-null.
+///
 /// @param Stmt             The statement for which to extract the information.
 /// @param UserPtr          A void pointer that can be casted to a
 ///                         SubtreeReferences structure.

Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=311239&r1=311238&r2=311239&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Sat Aug 19 05:58:28 2017
@@ -229,6 +229,12 @@ isl_stat addReferencesFromStmt(const Sco
   }
 
   for (auto &Access : *Stmt) {
+    if (References.ParamSpace) {
+      isl::space ParamSpace = Access->getLatestAccessRelation().get_space();
+      (*References.ParamSpace) =
+          References.ParamSpace->align_params(ParamSpace);
+    }
+
     if (Access->isLatestArrayKind()) {
       auto *BasePtr = Access->getScopArrayInfo()->getBasePtr();
       if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
@@ -297,7 +303,7 @@ void IslNodeBuilder::getReferencesInSubt
 
   SetVector<const SCEV *> SCEVs;
   struct SubtreeReferences References = {
-      LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator()};
+      LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr};
 
   for (const auto &I : IDToValue)
     Values.insert(I.second);

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=311239&r1=311238&r2=311239&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Sat Aug 19 05:58:28 2017
@@ -436,7 +436,8 @@ private:
   ///            in the scop, nor do they immediately surroung the Scop.
   ///            See [Code generation of induction variables of loops outside
   ///            Scops]
-  std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
+  std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>,
+             isl::space>
   getReferencesInKernel(ppcg_kernel *Kernel);
 
   /// Compute the sizes of the execution grid for a given kernel.
@@ -1434,13 +1435,16 @@ getFunctionsFromRawSubtreeValues(SetVect
   return SubtreeFunctions;
 }
 
-std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
+std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>,
+           isl::space>
 GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) {
   SetVector<Value *> SubtreeValues;
   SetVector<const SCEV *> SCEVs;
   SetVector<const Loop *> Loops;
+  isl::space ParamSpace = isl::space(S.getIslCtx(), 0, 0).params();
   SubtreeReferences References = {
-      LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator()};
+      LI,         SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator(),
+      &ParamSpace};
 
   for (const auto &I : IDToValue)
     SubtreeValues.insert(I.second);
@@ -1507,7 +1511,8 @@ GPUNodeBuilder::getReferencesInKernel(pp
     else
       ReplacedValues.insert(It->second);
   }
-  return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops);
+  return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops,
+                         ParamSpace);
 }
 
 void GPUNodeBuilder::clearDominators(Function *F) {
@@ -1751,9 +1756,16 @@ void GPUNodeBuilder::createKernel(__isl_
   SetVector<Value *> SubtreeValues;
   SetVector<Function *> SubtreeFunctions;
   SetVector<const Loop *> Loops;
-  std::tie(SubtreeValues, SubtreeFunctions, Loops) =
+  isl::space ParamSpace;
+  std::tie(SubtreeValues, SubtreeFunctions, Loops, ParamSpace) =
       getReferencesInKernel(Kernel);
 
+  // Add parameters that appear only in the access function to the kernel
+  // space. This is important to make sure that all isl_ids are passed as
+  // parameters to the kernel, even though we may not have all parameters
+  // in the context to improve compile time.
+  Kernel->space = isl_space_align_params(Kernel->space, ParamSpace.release());
+
   assert(Kernel->tree && "Device AST of kernel node is empty");
 
   Instruction &HostInsertPoint = *Builder.GetInsertPoint();

Added: polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll?rev=311239&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll (added)
+++ polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll Sat Aug 19 05:58:28 2017
@@ -0,0 +1,44 @@
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -polly-invariant-load-hoisting -polly-ignore-aliasing \
+; RUN: -polly-process-unprofitable -polly-ignore-parameter-bounds \
+; RUN: -polly-acc-fail-on-verify-module-failure \
+; RUN: -polly-acc-codegen-managed-memory \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck %s
+
+; REQUIRES: pollyacc
+
+; Verify that we correctly generate a kernel even if certain invariant load
+; hoisted parameters appear only in memory accesses, but not domain elements.
+
+; CHECK: @FUNC_quux_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_tmp4, i32 %tmp3, i32 %tmp, i32 %tmp31, i32 %tmp2)
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.hoge = type { i8*, i64, i64, [1 x %struct.widget] }
+%struct.widget = type { i64, i64, i64 }
+
+ at global = external unnamed_addr global %struct.hoge, align 32
+
+define void @quux(i32* noalias %arg, i32* noalias %arg1) {
+bb:
+  %tmp = load i32, i32* %arg, align 4
+  %tmp2 = sext i32 %tmp to i64
+  %tmp3 = load i32, i32* %arg1, align 4
+  %tmp4 = load [0 x double]*, [0 x double]** bitcast (%struct.hoge* @global to [0 x double]**), align 32
+  br label %bb5
+
+bb5:                                              ; preds = %bb5, %bb
+  %tmp6 = phi i32 [ %tmp11, %bb5 ], [ 0, %bb ]
+  %tmp7 = sext i32 %tmp6 to i64
+  %tmp8 = sub nsw i64 %tmp7, %tmp2
+  %tmp9 = getelementptr [0 x double], [0 x double]* %tmp4, i64 0, i64 %tmp8
+  store double undef, double* %tmp9, align 8
+  %tmp10 = icmp eq i32 %tmp6, %tmp3
+  %tmp11 = add i32 %tmp6, 1
+  br i1 %tmp10, label %bb12, label %bb5
+
+bb12:                                             ; preds = %bb5
+  ret void
+}




More information about the llvm-commits mailing list