[polly] r305185 - [Polly] [PPCGCodeGeneration] Skip Scops which contain function pointers.
Siddharth Bhat via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 12 04:41:09 PDT 2017
Author: bollu
Date: Mon Jun 12 06:41:09 2017
New Revision: 305185
URL: http://llvm.org/viewvc/llvm-project?rev=305185&view=rev
Log:
[Polly] [PPCGCodeGeneration] Skip Scops which contain function pointers.
In `PPCGCodeGeneration`, we try to take the references of every `Value`
that is used within a Scop to offload to the kernel. This occurs in
`GPUNodeBuilder::createLaunchParameters`.
This breaks if one of the values is a function pointer, since one of
these cases will trigger:
1. We try to to take the references of an intrinsic function, and this
breaks at `verifyModule`, since it is illegal to take the reference of
an intrinsic.
2. We manage to take the reference to a function, but this fails at
`verifyModule` since the function will not be present in the module that
is created in the kernel.
3. Even if `verifyModule` succeeds (which should not occur), we would
then try to call a *host function* from the *device*, which is
illegal runtime behaviour.
So, we disable this entire range of possibilities by simply not allowing
function references within a `Scop` which corresponds to a kernel.
However, note that this is too conservative. We *can* allow intrinsics
within kernels if the backend can lower the intrinsic correctly. For
example, an intrinsic like `llvm.powi.*` can actually be lowered by the `NVPTX`
backend.
We will now gradually whitelist intrinsics which are known to be safe.
Differential Revision: https://reviews.llvm.org/D33414
Added:
polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll
Modified:
polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=305185&r1=305184&r2=305185&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Mon Jun 12 06:41:09 2017
@@ -2611,6 +2611,36 @@ public:
return isl_ast_expr_ge(Iterations, MinComputeExpr);
}
+ /// Check whether the Block contains any Function value.
+ bool ContainsFnPtrValInBlock(const BasicBlock *BB) {
+ for (const Instruction &Inst : *BB)
+ for (Value *SrcVal : Inst.operands()) {
+ PointerType *p = dyn_cast<PointerType>(SrcVal->getType());
+ if (!p)
+ continue;
+ if (isa<FunctionType>(p->getElementType()))
+ return true;
+ }
+ return false;
+ }
+
+ /// Return whether the Scop S has functions.
+ bool ContainsFnPtr(const Scop &S) {
+ for (auto &Stmt : S) {
+ if (Stmt.isBlockStmt()) {
+ if (ContainsFnPtrValInBlock(Stmt.getBasicBlock()))
+ return true;
+ } else {
+ assert(Stmt.isRegionStmt() &&
+ "Stmt was neither block nor region statement");
+ for (const BasicBlock *BB : Stmt.getRegion()->blocks())
+ if (ContainsFnPtrValInBlock(BB))
+ return true;
+ }
+ }
+ return false;
+ }
+
/// Generate code for a given GPU AST described by @p Root.
///
/// @param Root An isl_ast_node pointing to the root of the GPU AST.
@@ -2681,6 +2711,14 @@ public:
if (S->hasInvariantAccesses())
return false;
+ // We currently do not support functions inside kernels, as code
+ // generation will need to offload function calls to the kernel.
+ // This may lead to a kernel trying to call a function on the host.
+ // This also allows us to prevent codegen from trying to take the
+ // address of an intrinsic function to send to the kernel.
+ if (ContainsFnPtr(CurrentScop))
+ return false;
+
auto PPCGScop = createPPCGScop();
auto PPCGProg = createPPCGProg(PPCGScop);
auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
Added: polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll?rev=305185&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll (added)
+++ polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll Mon Jun 12 06:41:09 2017
@@ -0,0 +1,82 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s --check-prefix=SCOP
+; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s
+
+; Check that we do not create a kernel if there is an
+; unknown function call in a candidate kernel.
+
+; Check that we model the kernel as a scop.
+; SCOP: Function: f
+; SCOP-NEXT: Region: %entry.split---%for.end13
+
+; If a kernel were generated, then this code would have been part of the kernel
+; and not the `.ll` file that is generated.
+; CHECK: %conv = fpext float %0 to double
+; CHECK-NEXT: %1 = tail call double @extern.fn(double %conv)
+; CHECK-NEXT: %conv6 = fptrunc double %1 to float
+
+; REQUIRES: pollyacc
+
+; static const int N = 1000;
+; void f(float A[N][N], int n, float B[N][N]) {
+; for(int i = 0; i < n; i++) {
+; for(int j = 0; j < n; j++) {
+; B[i][j] = extern_fn(A[i][j], 3);
+; }
+;
+; }
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @f([1000 x float]* %A, i32 %n, [1000 x float]* %B) {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %cmp3 = icmp sgt i32 %n, 0
+ br i1 %cmp3, label %for.cond1.preheader.lr.ph, label %for.end13
+
+for.cond1.preheader.lr.ph: ; preds = %entry.split
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc11
+ %indvars.iv5 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next6, %for.inc11 ]
+ %cmp21 = icmp sgt i32 %n, 0
+ br i1 %cmp21, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ br label %for.body3
+
+for.body3: ; preds = %for.body3.lr.ph, %for.body3
+ %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx5 = getelementptr inbounds [1000 x float], [1000 x float]* %A, i64 %indvars.iv5, i64 %indvars.iv
+ %0 = load float, float* %arrayidx5, align 4
+ %conv = fpext float %0 to double
+ %1 = tail call double @extern.fn(double %conv)
+ %conv6 = fptrunc double %1 to float
+ %arrayidx10 = getelementptr inbounds [1000 x float], [1000 x float]* %B, i64 %indvars.iv5, i64 %indvars.iv
+ store float %conv6, float* %arrayidx10, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %wide.trip.count = zext i32 %n to i64
+ %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.body3, label %for.cond1.for.inc11_crit_edge
+
+for.cond1.for.inc11_crit_edge: ; preds = %for.body3
+ br label %for.inc11
+
+for.inc11: ; preds = %for.cond1.for.inc11_crit_edge, %for.cond1.preheader
+ %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
+ %wide.trip.count7 = zext i32 %n to i64
+ %exitcond8 = icmp ne i64 %indvars.iv.next6, %wide.trip.count7
+ br i1 %exitcond8, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge
+
+for.cond.for.end13_crit_edge: ; preds = %for.inc11
+ br label %for.end13
+
+for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry.split
+ ret void
+}
+
+declare double @extern.fn(double) #0
+attributes #0 = { readnone }
More information about the llvm-commits
mailing list