[polly] r310193 - [Polly] [PPCGCodeGeneration] Deal with loops outside the Scop correctly in PPCGCodeGeneration.

Siddharth Bhat via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 5 19:39:05 PDT 2017


Author: bollu
Date: Sat Aug  5 19:39:05 2017
New Revision: 310193

URL: http://llvm.org/viewvc/llvm-project?rev=310193&view=rev
Log:
[Polly] [PPCGCodeGeneration] Deal with loops outside the Scop correctly in PPCGCodeGeneration.

A Scop with a loop outside it is not handled currently by
PPCGCodeGeneration. The test case is such that the Scop has only one inner loop
that is detected. This currently breaks codegen.

The fix is to reuse the existing mechanism in `IslNodeBuilder` within
`GPUNodeBuilder.

Differential Revision: https://reviews.llvm.org/D36290

Added:
    polly/trunk/test/GPGPU/loops-outside-scop.ll
Modified:
    polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp

Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=310193&r1=310192&r2=310193&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Sat Aug  5 19:39:05 2017
@@ -296,6 +296,7 @@ void IslNodeBuilder::getReferencesInSubt
   for (const auto &I : IDToValue)
     Values.insert(I.second);
 
+  // NOTE: this is populated in IslNodeBuilder::addParameters
   for (const auto &I : OutsideLoopIterations)
     Values.insert(cast<SCEVUnknown>(I.second)->getValue());
 

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=310193&r1=310192&r2=310193&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Sat Aug  5 19:39:05 2017
@@ -420,11 +420,18 @@ private:
   ///
   /// @param Kernel The kernel to scan for llvm::Values
   ///
-  /// @returns A pair, whose first element contains the set of values
-  ///          referenced by the kernel, and whose second element contains the
-  ///          set of functions referenced by the kernel. All functions in the
-  ///          second set satisfy isValidFunctionInKernel.
-  std::pair<SetVector<Value *>, SetVector<Function *>>
+  /// @returns A tuple, whose:
+  ///          - First element contains the set of values referenced by the
+  ///            kernel
+  ///          - Second element contains the set of functions referenced by the
+  ///             kernel. All functions in the set satisfy
+  ///             `isValidFunctionInKernel`.
+  ///          - Third element contains loops that have induction variables
+  ///            which are used in the kernel, *and* these loops are *neither*
+  ///            in the scop, nor do they immediately surroung the Scop.
+  ///            See [Code generation of induction variables of loops outside
+  ///            Scops]
+  std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
   getReferencesInKernel(ppcg_kernel *Kernel);
 
   /// Compute the sizes of the execution grid for a given kernel.
@@ -1396,7 +1403,7 @@ getFunctionsFromRawSubtreeValues(SetVect
   return SubtreeFunctions;
 }
 
-std::pair<SetVector<Value *>, SetVector<Function *>>
+std::tuple<SetVector<Value *>, SetVector<Function *>, SetVector<const Loop *>>
 GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) {
   SetVector<Value *> SubtreeValues;
   SetVector<const SCEV *> SCEVs;
@@ -1407,11 +1414,22 @@ GPUNodeBuilder::getReferencesInKernel(pp
   for (const auto &I : IDToValue)
     SubtreeValues.insert(I.second);
 
+  // NOTE: this is populated in IslNodeBuilder::addParameters
+  // See [Code generation of induction variables of loops outside Scops].
+  for (const auto &I : OutsideLoopIterations)
+    SubtreeValues.insert(cast<SCEVUnknown>(I.second)->getValue());
+
   isl_ast_node_foreach_descendant_top_down(
       Kernel->tree, collectReferencesInGPUStmt, &References);
 
-  for (const SCEV *Expr : SCEVs)
+  for (const SCEV *Expr : SCEVs) {
     findValues(Expr, SE, SubtreeValues);
+    findLoops(Expr, Loops);
+  }
+
+  Loops.remove_if([this](const Loop *L) {
+    return S.contains(L) || L->contains(S.getEntry());
+  });
 
   for (auto &SAI : S.arrays())
     SubtreeValues.remove(SAI->getBasePtr());
@@ -1458,7 +1476,7 @@ GPUNodeBuilder::getReferencesInKernel(pp
     else
       ReplacedValues.insert(It->second);
   }
-  return std::make_pair(ReplacedValues, ValidSubtreeFunctions);
+  return std::make_tuple(ReplacedValues, ValidSubtreeFunctions, Loops);
 }
 
 void GPUNodeBuilder::clearDominators(Function *F) {
@@ -1702,7 +1720,9 @@ void GPUNodeBuilder::createKernel(__isl_
 
   SetVector<Value *> SubtreeValues;
   SetVector<Function *> SubtreeFunctions;
-  std::tie(SubtreeValues, SubtreeFunctions) = getReferencesInKernel(Kernel);
+  SetVector<const Loop *> Loops;
+  std::tie(SubtreeValues, SubtreeFunctions, Loops) =
+      getReferencesInKernel(Kernel);
 
   assert(Kernel->tree && "Device AST of kernel node is empty");
 
@@ -1712,8 +1732,6 @@ void GPUNodeBuilder::createKernel(__isl_
   BlockGenerator::AllocaMapTy HostScalarMap = ScalarMap;
   ScalarMap.clear();
 
-  SetVector<const Loop *> Loops;
-
   // Create for all loops we depend on values that contain the current loop
   // iteration. These values are necessary to generate code for SCEVs that
   // depend on such loops. As a result we need to pass them to the subfunction.

Added: polly/trunk/test/GPGPU/loops-outside-scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/loops-outside-scop.ll?rev=310193&view=auto
==============================================================================
--- polly/trunk/test/GPGPU/loops-outside-scop.ll (added)
+++ polly/trunk/test/GPGPU/loops-outside-scop.ll Sat Aug  5 19:39:05 2017
@@ -0,0 +1,67 @@
+; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s -check-prefix=SCOP
+
+; There is no FileCheck because we want to make sure that this doesn't crash.
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-fail-on-verify-module-failure \
+; RUN: -disable-output < %s
+
+; REQUIRES: pollyacc
+
+; Due to the existence of the `fence` call, We can only detect the inner loop
+; and not the outer loop. PPCGCodeGeneration had not implemented this case.
+; The fix was to pull the implementation from `IslNodeBuilder.
+
+; Make sure that we only capture the inner loop
+; SCOP:      Function: f
+; SCOP-NEXT: Region: %for2.body---%for2.body.fence
+; SCOP-NEXT: Max Loop Depth:  1
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @fn_to_fence(i32 *%val)
+
+; void f(int *arr, bool shouldcont) {
+;     for(int i = 0; ; i++) {
+;         for(int j = 0; j < 10; j++) {
+;             arr[j] = i;
+;         }
+;         fence(arr);
+;         if (!shouldcont) break;
+;     }
+; }
+
+
+; Function Attrs: nounwind uwtable
+define void @f(i32 *%arr, i1 %shouldcont) #1 {
+entry:
+  br label %for.init
+
+for.init:                                             ; preds = %for.end, %entry.split
+  %i = phi i32 [ %i.next, %for.end ], [ 0, %entry ]
+  br label %for2.body
+
+for2.body:                                             ; preds = %"65", %"64"
+  %j = phi i32 [ %j.next, %for2.body ], [ 0, %for.init ]
+  %j.sext = sext i32 %j to i64
+  %arr.slot = getelementptr i32, i32* %arr, i64 %j.sext
+  store i32 %i, i32* %arr.slot, align 4
+  %exitcond = icmp eq i32 %j, 10
+  %j.next = add i32 %j, 1
+  br i1 %exitcond, label %for2.body.fence, label %for2.body
+
+for2.body.fence:                                             ; preds = %"65"
+  call void @fn_to_fence(i32* %arr) #2
+  br i1 %shouldcont, label %for.end, label %exit
+for.end:                                             ; preds = %"69"
+  %i.next = add i32 %i, 1
+  br label %for.init
+
+exit:                                             ; preds = %"69"
+  ret void
+
+}
+
+
+attributes #0 = { argmemonly nounwind }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { nounwind }




More information about the llvm-commits mailing list