[polly] r281193 - GPGPU: Bail out gracefully in case of invalid IR

Sun Sep 11 23:06:31 PDT 2016

Author: grosser
Date: Mon Sep 12 01:06:31 2016
New Revision: 281193

URL: http://llvm.org/viewvc/llvm-project?rev=281193&view=rev
Log:
GPGPU: Bail out gracefully in case of invalid IR

Instead of aborting, we now bail out gracefully in case the kernel IR we
generate is invalid. This can currently happen in case the SCoP stores
pointer values, which we model as arrays, as data values into other arrays. In
this case, the original pointer value is not available on the device and can
consequently not be stored. As detecting this ahead of time is not so easy, we
detect these situations after the invalid IR has been generated and bail out.

Added:
    polly/trunk/test/GPGPU/invalid-kernel.ll
Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=281193&r1=281192&r2=281193&view=diff
==============================================================================

--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Mon Sep 12 01:06:31 2016
@@ -152,6 +152,12 @@ public:
   /// Finalize the generated scop.
   virtual void finalize();
 
+  /// Track if the full build process was successful.
+  ///
+  /// This value is set to false, if throughout the build process an error
+  /// occurred which prevents us from generating valid GPU code.
+  bool BuildSuccessful = true;
+
 private:
   /// A vector of array base pointers for which a new ScopArrayInfo was created.
   ///
@@ -1409,10 +1415,10 @@ std::string GPUNodeBuilder::createKernel
 }
 
 std::string GPUNodeBuilder::finalizeKernelFunction() {
-  // Verify module.
-  llvm::legacy::PassManager Passes;
-  Passes.add(createVerifierPass());
-  Passes.run(*GPUModule);
+  if (verifyModule(*GPUModule)) {
+    BuildSuccessful = false;
+    return "";
+  }
 
   if (DumpKernelIR)
     outs() << *GPUModule << "\n";
@@ -2139,6 +2145,9 @@ public:
     NodeBuilder.initializeAfterRTH();
     NodeBuilder.create(Root);
     NodeBuilder.finalize();
+
+    if (!NodeBuilder.BuildSuccessful)
+      SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
   }
 
   bool runOnScop(Scop &CurrentScop) override {

Added: polly/trunk/test/GPGPU/invalid-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/invalid-kernel.ll?rev=281193&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/invalid-kernel.ll (added)
+++ polly/trunk/test/GPGPU/invalid-kernel.ll Mon Sep 12 01:06:31 2016
@@ -0,0 +1,78 @@
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck -check-prefix=CODE %s
+
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -disable-output < %s | \
+; RUN: not FileCheck %s -check-prefix=KERNEL-IR
+
+; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
+; RUN: FileCheck %s -check-prefix=IR
+
+; REQUIRES: pollyacc
+;
+;    void foo(long A[1024], long B[1024]) {
+;      for (long i = 0; i < 1024; i++)
+;        A[i] += (B[i] + (long)&B[i]);
+;    }
+
+; This kernel loads/stores a pointer address we model. This is a rare case,
+; were we still lack proper code-generation support. We check here that we
+; detect the invalid IR and bail out gracefully.
+
+; CODE: Code
+; CODE-NEXT: ====
+; CODE-NEXT: # host
+; CODE-NEXT: {
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i64), cudaMemcpyHostToDevice));
+; CODE-NEXT:   {
+; CODE-NEXT:     dim3 k0_dimBlock(32);
+; CODE-NEXT:     dim3 k0_dimGrid(32);
+; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_B, dev_MemRef_A);
+; CODE-NEXT:     cudaCheckKernel();
+; CODE-NEXT:   }
+
+; CODE:   cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i64), cudaMemcpyDeviceToHost));
+; CODE-NEXT: }
+
+; CODE: # kernel0
+; CODE-NEXT: Stmt_bb2(32 * b0 + t0);
+
+; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
+; RUN: FileCheck %s -check-prefix=IR
+
+; KERNEL-IR: kernel
+
+; IR: br i1 false, label %polly.start, label %bb1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(i64* %A, i64* %B) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb10, %bb
+  %i.0 = phi i64 [ 0, %bb ], [ %tmp11, %bb10 ]
+  %exitcond = icmp ne i64 %i.0, 1024
+  br i1 %exitcond, label %bb2, label %bb12
+
+bb2:                                              ; preds = %bb1
+  %tmp = getelementptr inbounds i64, i64* %B, i64 %i.0
+  %tmp3 = load i64, i64* %tmp, align 8
+  %tmp4 = getelementptr inbounds i64, i64* %B, i64 %i.0
+  %tmp5 = ptrtoint i64* %tmp4 to i64
+  %tmp6 = add nsw i64 %tmp3, %tmp5
+  %tmp7 = getelementptr inbounds i64, i64* %A, i64 %i.0
+  %tmp8 = load i64, i64* %tmp7, align 8
+  %tmp9 = add nsw i64 %tmp8, %tmp6
+  store i64 %tmp9, i64* %tmp7, align 8
+  br label %bb10
+
+bb10:                                             ; preds = %bb2
+  %tmp11 = add nuw nsw i64 %i.0, 1
+  br label %bb1
+
+bb12:                                             ; preds = %bb1
+  ret void
+}