[polly] r275987 - GPGPU: Bail out of scops with hoisted invariant loads

Tue Jul 19 08:56:25 PDT 2016

Author: grosser
Date: Tue Jul 19 10:56:25 2016
New Revision: 275987

URL: http://llvm.org/viewvc/llvm-project?rev=275987&view=rev
Log:
GPGPU: Bail out of scops with hoisted invariant loads

This is currently not supported and will only be added later. Also update the
test cases to ensure no invariant code hoisting is applied.

Modified:
    polly/trunk/include/polly/ScopInfo.h
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/host-statement.ll

Modified: polly/trunk/include/polly/ScopInfo.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/ScopInfo.h?rev=275987&r1=275986&r2=275987&view=diff
==============================================================================

--- polly/trunk/include/polly/ScopInfo.h (original)
+++ polly/trunk/include/polly/ScopInfo.h Tue Jul 19 10:56:25 2016
@@ -1939,6 +1939,9 @@ public:
     return InvariantEquivClasses;
   }
 
+  /// @brief Check if the scop has any invariant access.
+  bool hasInvariantAccesses() { return !InvariantEquivClasses.empty(); }
+
   /// @brief Mark the SCoP as optimized by the scheduler.
   void markAsOptimized() { IsOptimized = true; }
 

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=275987&r1=275986&r2=275987&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Tue Jul 19 10:56:25 2016
@@ -1013,6 +1013,10 @@ public:
     DL = &S->getRegion().getEntry()->getParent()->getParent()->getDataLayout();
     RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
 
+    // We currently do not support scops with invariant loads.
+    if (S->hasInvariantAccesses())
+      return false;
+
     auto PPCGScop = createPPCGScop();
     auto PPCGProg = createPPCGProg(PPCGScop);
     auto PPCGGen = generateGPU(PPCGScop, PPCGProg);

Modified: polly/trunk/test/GPGPU/host-statement.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/host-statement.ll?rev=275987&r1=275986&r2=275987&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/host-statement.ll (original)
+++ polly/trunk/test/GPGPU/host-statement.ll Tue Jul 19 10:56:25 2016
@@ -1,8 +1,10 @@
 ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -polly-invariant-load-hoisting=false \
 ; RUN: -disable-output < %s | \
 ; RUN: FileCheck -check-prefix=CODE %s
 
 ; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -polly-invariant-load-hoisting=false \
 ; RUN: -disable-output < %s | \
 ; RUN: FileCheck -check-prefix=KERNEL-IR %s
 
@@ -16,15 +18,16 @@ declare void @llvm.lifetime.start(i64, i
 ; This test case tests that we can correctly handle a ScopStmt that is
 ; scheduled on the host, instead of within a kernel.
 
-; CODE: Code
+; CODE-LABEL: Code
 ; CODE-NEXT: ====
 ; CODE-NEXT: # host
 ; CODE-NEXT: {
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
 ; CODE-NEXT:   {
 ; CODE-NEXT:     dim3 k0_dimBlock(32);
 ; CODE-NEXT:     dim3 k0_dimGrid(16);
-; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_Q, p_0, p_1);
+; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
 ; CODE-NEXT:     cudaCheckKernel();
 ; CODE-NEXT:   }
 
@@ -39,14 +42,13 @@ declare void @llvm.lifetime.start(i64, i
 ; CODE:     {
 ; CODE-NEXT:       dim3 k2_dimBlock(16, 32);
 ; CODE-NEXT:       dim3 k2_dimGrid(16, p_1 <= -7650 ? 256 : -p_1 + floord(31 * p_1 + 30, 32) + 16);
-; CODE-NEXT:       kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q,  p_0, p_1);
+; CODE-NEXT:       kernel2 <<<k2_dimGrid, k2_dimBlock>>> (dev_MemRef_A, dev_MemRef_R, dev_MemRef_Q, p_0, p_1);
 ; CODE-NEXT:       cudaCheckKernel();
 ; CODE-NEXT:     }
 
 ; CODE:   }
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
-; CODE-NEXT:   if (p_0 <= 510 && p_1 <= 510)
-; CODE-NEXT:     cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
+; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_R, dev_MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
 ; CODE-NEXT:   cudaCheckReturn(cudaMemcpy(MemRef_Q, dev_MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyDeviceToHost));
 ; CODE-NEXT:     Stmt_for_cond33_preheader();