[polly] r311239 - [GPGPU] Collect parameter dimension used in MemoryAccesses
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 19 05:58:28 PDT 2017
Author: grosser
Date: Sat Aug 19 05:58:28 2017
New Revision: 311239
URL: http://llvm.org/viewvc/llvm-project?rev=311239&view=rev
Log:
[GPGPU] Collect parameter dimension used in MemoryAccesses
When using -polly-ignore-integer-wrapping and -polly-acc-codegen-managed-memory
we add parameter dimensions lazily to the domains, which results in PPCG not
including parameter dimensions that are only used in memory accesses in the
kernel space. To make sure these parameters are still passed to the kernel, we
collect these parameter dimensions and align the kernel's parameter space
before code-generating it.
Added:
polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll
Modified:
polly/trunk/include/polly/CodeGen/IslNodeBuilder.h
polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
Modified: polly/trunk/include/polly/CodeGen/IslNodeBuilder.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/IslNodeBuilder.h?rev=311239&r1=311238&r2=311239&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/IslNodeBuilder.h (original)
+++ polly/trunk/include/polly/CodeGen/IslNodeBuilder.h Sat Aug 19 05:58:28 2017
@@ -23,6 +23,9 @@
#include "llvm/ADT/SmallVector.h"
#include "isl/ctx.h"
#include "isl/union_map.h"
+
+#include "isl-noexceptions.h"
+
#include <utility>
#include <vector>
@@ -41,6 +44,9 @@ struct SubtreeReferences {
SetVector<Value *> &Values;
SetVector<const SCEV *> &SCEVs;
BlockGenerator &BlockGen;
+ // In case an (optional) parameter space location is provided, parameter space
+ // information is collected as well.
+ isl::space *ParamSpace;
};
/// Extract the out-of-scop values and SCEVs referenced from a ScopStmt.
@@ -50,6 +56,10 @@ struct SubtreeReferences {
/// statements we force the generation of alloca memory locations and list
/// these locations in the set of out-of-scop values as well.
///
+/// We also collect an isl::space that includes all parameter dimensions
+/// used in the statement's memory accesses, in case the ParamSpace pointer
+/// is non-null.
+///
/// @param Stmt The statement for which to extract the information.
/// @param UserPtr A void pointer that can be casted to a
/// SubtreeReferences structure.
Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=311239&r1=311238&r2=311239&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Sat Aug 19 05:58:28 2017
@@ -229,6 +229,12 @@ isl_stat addReferencesFromStmt(const Sco
}
for (auto &Access : *Stmt) {
+ if (References.ParamSpace) {
+ isl::space ParamSpace = Access->getLatestAccessRelation().get_space();
+ (*References.ParamSpace) =
+ References.ParamSpace->align_params(ParamSpace);
+ }
+
if (Access->isLatestArrayKind()) {
auto *BasePtr = Access->getScopArrayInfo()->getBasePtr();
if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr))
@@ -297,7 +303,7 @@ void IslNodeBuilder::getReferencesInSubt
SetVector<const SCEV *> SCEVs;
struct SubtreeReferences References = {
- LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator()};
+ LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr};
for (const auto &I : IDToValue)
Values.insert(I.second);
Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=311239&r1=311238&r2=311239&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Sat Aug 19 05:58:28 2017
@@ -436,7 +436,8 @@ private:
/// in the scop, nor do they immediately surroung the Scop.
/// See [Code generation of induction variables of loops outside
/// Scops]
- std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
+ std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>,
+ isl::space>
getReferencesInKernel(ppcg_kernel *Kernel);
/// Compute the sizes of the execution grid for a given kernel.
@@ -1434,13 +1435,16 @@ getFunctionsFromRawSubtreeValues(SetVect
return SubtreeFunctions;
}
-std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
+std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>,
+ isl::space>
GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) {
SetVector<Value *> SubtreeValues;
SetVector<const SCEV *> SCEVs;
SetVector<const Loop *> Loops;
+ isl::space ParamSpace = isl::space(S.getIslCtx(), 0, 0).params();
SubtreeReferences References = {
- LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator()};
+ LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator(),
+ &ParamSpace};
for (const auto &I : IDToValue)
SubtreeValues.insert(I.second);
@@ -1507,7 +1511,8 @@ GPUNodeBuilder::getReferencesInKernel(pp
else
ReplacedValues.insert(It->second);
}
- return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops);
+ return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops,
+ ParamSpace);
}
void GPUNodeBuilder::clearDominators(Function *F) {
@@ -1751,9 +1756,16 @@ void GPUNodeBuilder::createKernel(__isl_
SetVector<Value *> SubtreeValues;
SetVector<Function *> SubtreeFunctions;
SetVector<const Loop *> Loops;
- std::tie(SubtreeValues, SubtreeFunctions, Loops) =
+ isl::space ParamSpace;
+ std::tie(SubtreeValues, SubtreeFunctions, Loops, ParamSpace) =
getReferencesInKernel(Kernel);
+ // Add parameters that appear only in the access function to the kernel
+ // space. This is important to make sure that all isl_ids are passed as
+ // parameters to the kernel, even though we may not have all parameters
+ // in the context to improve compile time.
+ Kernel->space = isl_space_align_params(Kernel->space, ParamSpace.release());
+
assert(Kernel->tree && "Device AST of kernel node is empty");
Instruction &HostInsertPoint = *Builder.GetInsertPoint();
Added: polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll?rev=311239&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll (added)
+++ polly/trunk/test/GPGPU/memory-only-referenced-from-access.ll Sat Aug 19 05:58:28 2017
@@ -0,0 +1,44 @@
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -polly-invariant-load-hoisting -polly-ignore-aliasing \
+; RUN: -polly-process-unprofitable -polly-ignore-parameter-bounds \
+; RUN: -polly-acc-fail-on-verify-module-failure \
+; RUN: -polly-acc-codegen-managed-memory \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck %s
+
+; REQUIRES: pollyacc
+
+; Verify that we correctly generate a kernel even if certain invariant load
+; hoisted parameters appear only in memory accesses, but not domain elements.
+
+; CHECK: @FUNC_quux_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_tmp4, i32 %tmp3, i32 %tmp, i32 %tmp31, i32 %tmp2)
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.hoge = type { i8*, i64, i64, [1 x %struct.widget] }
+%struct.widget = type { i64, i64, i64 }
+
+ at global = external unnamed_addr global %struct.hoge, align 32
+
+define void @quux(i32* noalias %arg, i32* noalias %arg1) {
+bb:
+ %tmp = load i32, i32* %arg, align 4
+ %tmp2 = sext i32 %tmp to i64
+ %tmp3 = load i32, i32* %arg1, align 4
+ %tmp4 = load [0 x double]*, [0 x double]** bitcast (%struct.hoge* @global to [0 x double]**), align 32
+ br label %bb5
+
+bb5: ; preds = %bb5, %bb
+ %tmp6 = phi i32 [ %tmp11, %bb5 ], [ 0, %bb ]
+ %tmp7 = sext i32 %tmp6 to i64
+ %tmp8 = sub nsw i64 %tmp7, %tmp2
+ %tmp9 = getelementptr [0 x double], [0 x double]* %tmp4, i64 0, i64 %tmp8
+ store double undef, double* %tmp9, align 8
+ %tmp10 = icmp eq i32 %tmp6, %tmp3
+ %tmp11 = add i32 %tmp6, 1
+ br i1 %tmp10, label %bb12, label %bb5
+
+bb12: ; preds = %bb5
+ ret void
+}
More information about the llvm-commits
mailing list