[polly] r305185 - [Polly] [PPCGCodeGeneration] Skip Scops which contain function pointers.

Siddharth Bhat via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 12 04:41:09 PDT 2017


Author: bollu
Date: Mon Jun 12 06:41:09 2017
New Revision: 305185

URL: http://llvm.org/viewvc/llvm-project?rev=305185&view=rev
Log:
[Polly] [PPCGCodeGeneration] Skip Scops which contain function pointers.

In `PPCGCodeGeneration`, we try to take the references of every `Value`
that is used within a Scop to offload to the kernel. This occurs in
`GPUNodeBuilder::createLaunchParameters`.

This breaks if one of the values is a function pointer, since one of
these cases will trigger:

1. We try to to take the references of an intrinsic function, and this
breaks at `verifyModule`, since it is illegal to take the reference of
an intrinsic.

2. We manage to take the reference to a function, but this fails at
`verifyModule` since the function will not be present in the module that
is created in the kernel.

3. Even if `verifyModule` succeeds (which should not occur), we would
then try to call a *host function* from the *device*, which is
illegal runtime behaviour.

So, we disable this entire range of possibilities by simply not allowing
function references within a `Scop` which corresponds to a kernel.

However, note that this is too conservative. We *can* allow intrinsics
within kernels if the backend can lower the intrinsic correctly. For
example, an intrinsic like `llvm.powi.*` can actually be lowered by the `NVPTX`
backend.

We will now gradually whitelist intrinsics which are known to be safe.

Differential Revision: https://reviews.llvm.org/D33414

Added:
    polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll
Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=305185&r1=305184&r2=305185&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Mon Jun 12 06:41:09 2017
@@ -2611,6 +2611,36 @@ public:
     return isl_ast_expr_ge(Iterations, MinComputeExpr);
   }
 
+  /// Check whether the Block contains any Function value.
+  bool ContainsFnPtrValInBlock(const BasicBlock *BB) {
+    for (const Instruction &Inst : *BB)
+      for (Value *SrcVal : Inst.operands()) {
+        PointerType *p = dyn_cast<PointerType>(SrcVal->getType());
+        if (!p)
+          continue;
+        if (isa<FunctionType>(p->getElementType()))
+          return true;
+      }
+    return false;
+  }
+
+  /// Return whether the Scop S has functions.
+  bool ContainsFnPtr(const Scop &S) {
+    for (auto &Stmt : S) {
+      if (Stmt.isBlockStmt()) {
+        if (ContainsFnPtrValInBlock(Stmt.getBasicBlock()))
+          return true;
+      } else {
+        assert(Stmt.isRegionStmt() &&
+               "Stmt was neither block nor region statement");
+        for (const BasicBlock *BB : Stmt.getRegion()->blocks())
+          if (ContainsFnPtrValInBlock(BB))
+            return true;
+      }
+    }
+    return false;
+  }
+
   /// Generate code for a given GPU AST described by @p Root.
   ///
   /// @param Root An isl_ast_node pointing to the root of the GPU AST.
@@ -2681,6 +2711,14 @@ public:
     if (S->hasInvariantAccesses())
       return false;
 
+    // We currently do not support functions inside kernels, as code
+    // generation will need to offload function calls to the kernel.
+    // This may lead to a kernel trying to call a function on the host.
+    // This also allows us to prevent codegen from trying to take the
+    // address of an intrinsic function to send to the kernel.
+    if (ContainsFnPtr(CurrentScop))
+      return false;
+
     auto PPCGScop = createPPCGScop();
     auto PPCGProg = createPPCGProg(PPCGScop);
     auto PPCGGen = generateGPU(PPCGScop, PPCGProg);

Added: polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll?rev=305185&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll (added)
+++ polly/trunk/test/GPGPU/unknown-fn-call-not-copied-into-kernel.ll Mon Jun 12 06:41:09 2017
@@ -0,0 +1,82 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s --check-prefix=SCOP
+; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s
+
+; Check that we do not create a kernel if there is an
+; unknown function call in a candidate kernel.
+
+; Check that we model the kernel as a scop.
+; SCOP:      Function: f
+; SCOP-NEXT:     Region: %entry.split---%for.end13
+
+; If a kernel were generated, then this code would have been part of the kernel
+; and not the `.ll` file that is generated.
+; CHECK:       %conv = fpext float %0 to double
+; CHECK-NEXT:  %1 = tail call double @extern.fn(double %conv)
+; CHECK-NEXT:  %conv6 = fptrunc double %1 to float
+
+; REQUIRES: pollyacc
+
+; static const int N = 1000;
+; void f(float A[N][N], int n, float B[N][N]) {
+;   for(int i = 0; i < n; i++) {
+;     for(int j = 0; j < n; j++) {
+;       B[i][j] = extern_fn(A[i][j], 3);
+;     }
+;
+;   }
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @f([1000 x float]* %A, i32 %n, [1000 x float]* %B) {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  %cmp3 = icmp sgt i32 %n, 0
+  br i1 %cmp3, label %for.cond1.preheader.lr.ph, label %for.end13
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry.split
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.lr.ph, %for.inc11
+  %indvars.iv5 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next6, %for.inc11 ]
+  %cmp21 = icmp sgt i32 %n, 0
+  br i1 %cmp21, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph:                                  ; preds = %for.cond1.preheader
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3.lr.ph, %for.body3
+  %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx5 = getelementptr inbounds [1000 x float], [1000 x float]* %A, i64 %indvars.iv5, i64 %indvars.iv
+  %0 = load float, float* %arrayidx5, align 4
+  %conv = fpext float %0 to double
+  %1 = tail call double @extern.fn(double %conv)
+  %conv6 = fptrunc double %1 to float
+  %arrayidx10 = getelementptr inbounds [1000 x float], [1000 x float]* %B, i64 %indvars.iv5, i64 %indvars.iv
+  store float %conv6, float* %arrayidx10, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %wide.trip.count = zext i32 %n to i64
+  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.body3, label %for.cond1.for.inc11_crit_edge
+
+for.cond1.for.inc11_crit_edge:                    ; preds = %for.body3
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.cond1.for.inc11_crit_edge, %for.cond1.preheader
+  %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
+  %wide.trip.count7 = zext i32 %n to i64
+  %exitcond8 = icmp ne i64 %indvars.iv.next6, %wide.trip.count7
+  br i1 %exitcond8, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge
+
+for.cond.for.end13_crit_edge:                     ; preds = %for.inc11
+  br label %for.end13
+
+for.end13:                                        ; preds = %for.cond.for.end13_crit_edge, %entry.split
+  ret void
+}
+
+declare double @extern.fn(double) #0
+attributes #0 = { readnone }




More information about the llvm-commits mailing list