[polly] d5c8716 - [Polly] Use VirtualUse to determine references.

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 26 01:32:16 PDT 2021


Author: Michael Kruse
Date: 2021-09-26T03:26:43-05:00
New Revision: d5c87162db7763c89160dc66894bebf3bd1e90d7

URL: https://github.com/llvm/llvm-project/commit/d5c87162db7763c89160dc66894bebf3bd1e90d7
DIFF: https://github.com/llvm/llvm-project/commit/d5c87162db7763c89160dc66894bebf3bd1e90d7.diff

LOG: [Polly] Use VirtualUse to determine references.

VirtualUse ensures consistency over different source of values with
Polly. In particular, this enables its use of instructions moved between
Statement. Before the patch, the code wrongly assumed that the BB's
instructions are also the ScopStmt's instructions. Reference are
determined for OpenMP outlining and GPGPU kernel extraction.

GPGPU CodeGen had some problems. For one, it generated GPU kernel
parameters for constants. Second, it emitted GPU-side invariant loads
which have already been loaded by the host. This has been partially
fixed, it still generates a store for the invariant load result, but
using the value that the host has already written.

WARNING: I did not test the generated PollyACC code on an actual GPU.

The improved consistency will be made use of in the next patch.

Added: 
    

Modified: 
    polly/include/polly/CodeGen/IslNodeBuilder.h
    polly/lib/CodeGen/IslNodeBuilder.cpp
    polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll
    polly/test/GPGPU/invariant-load-of-scalar.ll
    polly/test/GPGPU/phi-nodes-in-kernel.ll

Removed: 
    


################################################################################
diff  --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h
index ee0a1e58ae86..450d63c286e9 100644
--- a/polly/include/polly/CodeGen/IslNodeBuilder.h
+++ b/polly/include/polly/CodeGen/IslNodeBuilder.h
@@ -58,7 +58,7 @@ struct SubtreeReferences {
 ///                         SubtreeReferences structure.
 /// @param CreateScalarRefs Should the result include allocas of scalar
 ///                         references?
-void addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
+void addReferencesFromStmt(ScopStmt *Stmt, void *UserPtr,
                            bool CreateScalarRefs = true);
 
 class IslNodeBuilder {

diff  --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 596c576c5666..06fb1c775f69 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -24,6 +24,7 @@
 #include "polly/Support/ISLTools.h"
 #include "polly/Support/SCEVValidator.h"
 #include "polly/Support/ScopHelper.h"
+#include "polly/Support/VirtualInstruction.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
@@ -205,40 +206,68 @@ int IslNodeBuilder::getNumberOfIterations(isl::ast_node_for For) {
     return NumberIterations + 1;
 }
 
-/// Extract the values and SCEVs needed to generate code for a block.
-static int findReferencesInBlock(struct SubtreeReferences &References,
-                                 const ScopStmt *Stmt, BasicBlock *BB) {
-  for (Instruction &Inst : *BB) {
-    // Include invariant loads
-    if (isa<LoadInst>(Inst))
-      if (Value *InvariantLoad = References.GlobalMap.lookup(&Inst))
-        References.Values.insert(InvariantLoad);
-
-    for (Value *SrcVal : Inst.operands()) {
-      auto *Scope = References.LI.getLoopFor(BB);
-      if (canSynthesize(SrcVal, References.S, &References.SE, Scope)) {
-        References.SCEVs.insert(References.SE.getSCEVAtScope(SrcVal, Scope));
-        continue;
-      } else if (Value *NewVal = References.GlobalMap.lookup(SrcVal))
-        References.Values.insert(NewVal);
+static void findReferencesByUse(Value *SrcVal, ScopStmt *UserStmt,
+                                Loop *UserScope, const ValueMapT &GlobalMap,
+                                SetVector<Value *> &Values,
+                                SetVector<const SCEV *> &SCEVs) {
+  VirtualUse VUse = VirtualUse::create(UserStmt, UserScope, SrcVal, true);
+  switch (VUse.getKind()) {
+  case VirtualUse::Constant:
+    // When accelerator-offloading, GlobalValue is a host address whose content
+    // must still be transferred to the GPU.
+    if (isa<GlobalValue>(SrcVal))
+      Values.insert(SrcVal);
+    break;
+
+  case VirtualUse::Synthesizable:
+    SCEVs.insert(VUse.getScevExpr());
+    return;
+
+  case VirtualUse::Block:
+  case VirtualUse::ReadOnly:
+  case VirtualUse::Hoisted:
+  case VirtualUse::Intra:
+  case VirtualUse::Inter:
+    break;
+  }
+
+  if (Value *NewVal = GlobalMap.lookup(SrcVal))
+    Values.insert(NewVal);
+}
+
+static void findReferencesInInst(Instruction *Inst, ScopStmt *UserStmt,
+                                 Loop *UserScope, const ValueMapT &GlobalMap,
+                                 SetVector<Value *> &Values,
+                                 SetVector<const SCEV *> &SCEVs) {
+  for (Use &U : Inst->operands())
+    findReferencesByUse(U.get(), UserStmt, UserScope, GlobalMap, Values, SCEVs);
+}
+
+static void findReferencesInStmt(ScopStmt *Stmt, SetVector<Value *> &Values,
+                                 ValueMapT &GlobalMap,
+                                 SetVector<const SCEV *> &SCEVs) {
+  LoopInfo *LI = Stmt->getParent()->getLI();
+
+  BasicBlock *BB = Stmt->getBasicBlock();
+  Loop *Scope = LI->getLoopFor(BB);
+  for (Instruction *Inst : Stmt->getInstructions())
+    findReferencesInInst(Inst, Stmt, Scope, GlobalMap, Values, SCEVs);
+
+  if (Stmt->isRegionStmt()) {
+    for (BasicBlock *BB : Stmt->getRegion()->blocks()) {
+      Loop *Scope = LI->getLoopFor(BB);
+      for (Instruction &Inst : *BB)
+        findReferencesInInst(&Inst, Stmt, Scope, GlobalMap, Values, SCEVs);
     }
   }
-  return 0;
 }
 
-void polly::addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
+void polly::addReferencesFromStmt(ScopStmt *Stmt, void *UserPtr,
                                   bool CreateScalarRefs) {
   auto &References = *static_cast<struct SubtreeReferences *>(UserPtr);
 
-  if (Stmt->isBlockStmt())
-    findReferencesInBlock(References, Stmt, Stmt->getBasicBlock());
-  else if (Stmt->isRegionStmt()) {
-    for (BasicBlock *BB : Stmt->getRegion()->blocks())
-      findReferencesInBlock(References, Stmt, BB);
-  } else {
-    assert(Stmt->isCopyStmt());
-    // Copy Stmts have no instructions that we need to consider.
-  }
+  findReferencesInStmt(Stmt, References.Values, References.GlobalMap,
+                       References.SCEVs);
 
   for (auto &Access : *Stmt) {
     if (References.ParamSpace) {
@@ -276,8 +305,8 @@ void polly::addReferencesFromStmt(const ScopStmt *Stmt, void *UserPtr,
 static void addReferencesFromStmtSet(isl::set Set,
                                      struct SubtreeReferences *UserPtr) {
   isl::id Id = Set.get_tuple_id();
-  auto *Stmt = static_cast<const ScopStmt *>(Id.get_user());
-  return addReferencesFromStmt(Stmt, UserPtr);
+  auto *Stmt = static_cast<ScopStmt *>(Id.get_user());
+  addReferencesFromStmt(Stmt, UserPtr);
 }
 
 /// Extract the out-of-scop values and SCEVs referenced from a union set

diff  --git a/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll b/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll
index 42772377c3cb..cd4fe6a4587b 100644
--- a/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll
+++ b/polly/test/GPGPU/invariant-load-hoisting-read-in-kernel.ll
@@ -1,14 +1,24 @@
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-invariant-load-hoisting \
+; RUN: -S < %s | \
+; RUN: FileCheck -check-prefix=HOST-IR %s
+
 ; RUN: opt %loadPolly -disable-output -polly-acc-dump-kernel-ir \
 ; RUN: -polly-codegen-ppcg -polly-scops \
-; RUN: -polly-invariant-load-hoisting < %s | FileCheck %s
+; RUN: -polly-invariant-load-hoisting < %s | FileCheck -check-prefix=KERNEL-IR %s
 
 ; REQUIRES: pollyacc
 
 ; Verify that invariant loads used in a kernel statement are correctly forwarded
 ; as subtree value to the GPU kernel.
 
-; CHECK:  define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_0({{.*}} float %polly.access.p.load)
-; CHECK:   store float %polly.access.p.load, float* %indvar2f.phiops
+; HOST-IR: store float %polly.access.p.load, float* %invariant.preload.s2a, align 4
+
+; KERNEL-IR:  define ptx_kernel void @FUNC_foo_SCOP_0_KERNEL_2({{.*}}i8 addrspace(1)* %MemRef_indvar2f__phi{{.*}})
+; KERNEL-IR:   %indvar2f.phiops.reload = load float, float* %indvar2f.phiops, align 4
+; KERNEL-IR:   store float %indvar2f.phiops.reload, float addrspace(1)* %polly.access.MemRef_A, align 4
+
+; FIXME: store float %indvar2f.phiops.reload, float* %indvar2f.phiops, align 4
+; For some reason the above instruction is emitted that stores back to the addess it was just loaded from.
 
 define void @foo(float* %A, float* %p) {
 entry:
@@ -21,15 +31,15 @@ loop:
   %ptr = getelementptr float, float* %A, i64 %indvar
   store float 42.0, float* %ptr
   %cmp = icmp sle i64 %indvar, 1024
-  br i1 %cmp, label %loop, label %loop2
+  br i1 %cmp, label %loop, label %anotherloop
 
-loop2:
-  %indvar2 = phi i64 [0, %loop], [%indvar2.next, %loop2]
-  %indvar2f = phi float [%invariant, %loop], [%indvar2f, %loop2]
+anotherloop:
+  %indvar2 = phi i64 [0, %loop], [%indvar2.next, %anotherloop]
+  %indvar2f = phi float [%invariant, %loop], [%indvar2f, %anotherloop]
   %indvar2.next = add i64 %indvar2, 1
   store float %indvar2f, float* %A
   %cmp2 = icmp sle i64 %indvar2, 1024
-  br i1 %cmp2, label %loop2, label %end
+  br i1 %cmp2, label %anotherloop, label %end
 
 end:
   ret void

diff  --git a/polly/test/GPGPU/invariant-load-of-scalar.ll b/polly/test/GPGPU/invariant-load-of-scalar.ll
index 900bdac88225..f1351167a576 100644
--- a/polly/test/GPGPU/invariant-load-of-scalar.ll
+++ b/polly/test/GPGPU/invariant-load-of-scalar.ll
@@ -38,8 +38,7 @@
 ; kernel function.
 ; KERNEL-IR: define ptx_kernel void @FUNC_checkPrivatization_SCOP_0_KERNEL_0
 ; KERNEL-IR-SAME: (i8 addrspace(1)* %MemRef_A, i32 %tmp,
-; KERNEL-IR-SAME: i32 %tmp2, i32 %polly.access.begin.load,
-; KERNEL-IR-SAME: i32 %polly.access.end.load)
+; KERNEL-IR-SAME: i32 %tmp2, i32 %polly.access.begin.load)
 
 
 ; void checkScalarPointerOffload(int A[], int *begin, int *end) {

diff  --git a/polly/test/GPGPU/phi-nodes-in-kernel.ll b/polly/test/GPGPU/phi-nodes-in-kernel.ll
index 7e9b1e110b70..fd299d2c37be 100644
--- a/polly/test/GPGPU/phi-nodes-in-kernel.ll
+++ b/polly/test/GPGPU/phi-nodes-in-kernel.ll
@@ -52,9 +52,9 @@ target triple = "x86_64-unknown-linux-gnu"
 ; IR:       [[REGC:%.+]] =   bitcast i32* %{{[0-9]+}} to i8*
 ; IR-NEXT:  call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196)
 
-; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c, i32 %0) #0 {
-; KERNEL-IR: %polly.access.MemRef_c = getelementptr i32, i32 addrspace(1)* %polly.access.cast.MemRef_c, i64 %10
-; KERNEL-IR-NEXT: store i32 %0, i32 addrspace(1)* %polly.access.MemRef_c, align 4
+; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c) #0 {
+; KERNEL-IR: %polly.access.MemRef_c = getelementptr i32, i32 addrspace(1)* %polly.access.cast.MemRef_c, i64 %9
+; KERNEL-IR-NEXT: store i32 422, i32 addrspace(1)* %polly.access.MemRef_c, align 4
 
 define void @kernel_dynprog([50 x i32]* %c) {
 entry:
@@ -75,7 +75,7 @@ for.cond15.for.cond12.loopexit_crit_edge:         ; preds = %for.body17
 for.body17:                                       ; preds = %for.body17, %for.cond1.preheader
   %indvars.iv71 = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next72, %for.body17 ]
   %arrayidx69 = getelementptr inbounds [50 x i32], [50 x i32]* %c, i64 0, i64 %indvars.iv71
-  store i32 undef, i32* %arrayidx69, align 4
+  store i32 422, i32* %arrayidx69, align 4
   %indvars.iv.next72 = add nuw nsw i64 %indvars.iv71, 1
   %lftr.wideiv74 = trunc i64 %indvars.iv.next72 to i32
   %exitcond75 = icmp ne i32 %lftr.wideiv74, 50


        


More information about the llvm-commits mailing list