[polly] r205320 - Delete trivial PHI nodes (aka stack slot sharing)

Tobias Grosser tobias at grosser.es
Tue Apr 1 09:01:33 PDT 2014


Author: grosser
Date: Tue Apr  1 11:01:33 2014
New Revision: 205320

URL: http://llvm.org/viewvc/llvm-project?rev=205320&view=rev
Log:
Delete trivial PHI nodes (aka stack slot sharing)

During code preperation trivial PHI nodes (mainly introduced by lcssa) are
deleted to decrease the number of introduced allocas (==> dependences). However
simply replacing them by their only incoming value would cause the independent
block pass to introduce new allocas. To prevent this we try to share stack slots
during code preperarion, hence to reuse a already created alloca 'to demote' the
trivial PHI node. This works if we know that the value stored in this alloca
will be the incoming value of the trivial PHI at the end of the predecessor
block of this trivial PHI.

Contributed-by: Johannes Doerfert <doerfert at cs.uni-saarland.de>

Added:
    polly/trunk/test/CodePreparation/multiple_loops_trivial_phis.ll
    polly/trunk/test/CodePreparation/single_loop_trivial_phi.ll
Modified:
    polly/trunk/lib/Transform/CodePreparation.cpp

Modified: polly/trunk/lib/Transform/CodePreparation.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/CodePreparation.cpp?rev=205320&r1=205319&r2=205320&view=diff
==============================================================================
--- polly/trunk/lib/Transform/CodePreparation.cpp (original)
+++ polly/trunk/lib/Transform/CodePreparation.cpp Tue Apr  1 11:01:33 2014
@@ -49,6 +49,28 @@ using namespace llvm;
 using namespace polly;
 
 namespace {
+
+// Helper function which (for a given PHI node):
+//
+// 1) Remembers all incoming values and the associated basic blocks
+// 2) Demotes the phi node to the stack
+// 3) Remembers the correlation between the PHI node and the new alloca
+//
+// When we combine the information from 1) and 3) we know the values stored
+// in this alloca at the end of the predecessor basic blocks of the PHI.
+static void DemotePHI(
+    PHINode *PN, DenseMap<PHINode *, AllocaInst *> &PNallocMap,
+    DenseMap<std::pair<Value *, BasicBlock *>, PHINode *> &ValueLocToPhiMap) {
+
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    auto *InVal = PN->getIncomingValue(i);
+    auto *InBB = PN->getIncomingBlock(i);
+    ValueLocToPhiMap[std::make_pair(InVal, InBB)] = PN;
+  }
+
+  PNallocMap[PN] = DemotePHIToStack(PN);
+}
+
 /// @brief Prepare the IR for the scop detection.
 ///
 class CodePreparation : public FunctionPass {
@@ -84,12 +106,23 @@ void CodePreparation::clear() {}
 CodePreparation::~CodePreparation() { clear(); }
 
 bool CodePreparation::eliminatePHINodes(Function &F) {
-  // The PHINodes that will be deleted.
+  // The PHINodes that will be demoted.
+  std::vector<PHINode *> PNtoDemote;
+  // The PHINodes that will be deleted (stack slot sharing).
   std::vector<PHINode *> PNtoDelete;
   // The PHINodes that will be preserved.
-  std::vector<PHINode *> PreservedPNs;
-
-  // Scan the PHINodes in this function.
+  std::vector<PHINode *> PNtoPreserve;
+  // Map to remember values stored in PHINodes at the end of basic blocks.
+  DenseMap<std::pair<Value *, BasicBlock *>, PHINode *> ValueLocToPhiMap;
+  // Map from PHINodes to their alloca (after demotion) counterpart.
+  DenseMap<PHINode *, AllocaInst *> PNallocMap;
+
+  // Scan the PHINodes in this function and categorize them to be either:
+  // o Preserved, if they are (canonical) induction variables or can be
+  //              synthesized during code generation ('SCEVable')
+  // o Deleted, if they are trivial PHI nodes (one incoming value) and the
+  //            incoming value is a PHI node we will demote
+  // o Demoted, if they do not fit any of the previous categories
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI)
     for (BasicBlock::iterator II = BI->begin(), IE = BI->getFirstNonPHI();
          II != IE; ++II) {
@@ -98,7 +131,7 @@ bool CodePreparation::eliminatePHINodes(
         if (SE->isSCEVable(PN->getType())) {
           const SCEV *S = SE->getSCEV(PN);
           if (!isa<SCEVUnknown>(S) && !isa<SCEVCouldNotCompute>(S)) {
-            PreservedPNs.push_back(PN);
+            PNtoPreserve.push_back(PN);
             continue;
           }
         }
@@ -106,7 +139,7 @@ bool CodePreparation::eliminatePHINodes(
         if (Loop *L = LI->getLoopFor(BI)) {
           // Induction variables will be preserved.
           if (L->getCanonicalInductionVariable() == PN) {
-            PreservedPNs.push_back(PN);
+            PNtoPreserve.push_back(PN);
             continue;
           }
         }
@@ -114,26 +147,58 @@ bool CodePreparation::eliminatePHINodes(
 
       // As DemotePHIToStack does not support invoke edges, we preserve
       // PHINodes that have invoke edges.
-      if (hasInvokeEdge(PN))
-        PreservedPNs.push_back(PN);
-      else
-        PNtoDelete.push_back(PN);
+      if (hasInvokeEdge(PN)) {
+        PNtoPreserve.push_back(PN);
+      } else {
+        if (PN->getNumIncomingValues() == 1)
+          PNtoDelete.push_back(PN);
+        else
+          PNtoDemote.push_back(PN);
+      }
     }
 
-  if (PNtoDelete.empty())
+  if (PNtoDemote.empty() && PNtoDelete.empty())
     return false;
 
-  while (!PNtoDelete.empty()) {
-    PHINode *PN = PNtoDelete.back();
-    PNtoDelete.pop_back();
+  while (!PNtoDemote.empty()) {
+    PHINode *PN = PNtoDemote.back();
+    PNtoDemote.pop_back();
+    DemotePHI(PN, PNallocMap, ValueLocToPhiMap);
+  }
+
+  // For each trivial PHI we encountered (and we want to delete) we try to find
+  // the value it will hold in a alloca we already created by PHI demotion. If
+  // we succeed (the incoming value is stored in an alloca at the predecessor
+  // block), we can replace the trivial PHI by the value stored in the alloca.
+  // If not, we will demote this trivial PHI as any other one.
+  for (auto PNIt = PNtoDelete.rbegin(), PNEnd = PNtoDelete.rend();
+       PNIt != PNEnd; ++PNIt) {
+    PHINode *TrivPN = *PNIt;
+    assert(TrivPN->getNumIncomingValues() == 1 && "Assumed trivial PHI");
+
+    auto *InVal = TrivPN->getIncomingValue(0);
+    auto *InBB = TrivPN->getIncomingBlock(0);
+    const auto &ValLocIt = ValueLocToPhiMap.find(std::make_pair(InVal, InBB));
+    if (ValLocIt != ValueLocToPhiMap.end()) {
+      PHINode *InPHI = ValLocIt->second;
+      assert(PNallocMap.count(InPHI) &&
+             "Inconsitent state, PN was not demoted!");
+      auto *InPHIAlloca = PNallocMap[InPHI];
+      PNallocMap[TrivPN] = InPHIAlloca;
+      LoadInst *LI = new LoadInst(InPHIAlloca, "",
+                                  TrivPN->getParent()->getFirstInsertionPt());
+      TrivPN->replaceAllUsesWith(LI);
+      TrivPN->eraseFromParent();
+      continue;
+    }
 
-    DemotePHIToStack(PN);
+    DemotePHI(TrivPN, PNallocMap, ValueLocToPhiMap);
   }
 
   // Move preserved PHINodes to the beginning of the BasicBlock.
-  while (!PreservedPNs.empty()) {
-    PHINode *PN = PreservedPNs.back();
-    PreservedPNs.pop_back();
+  while (!PNtoPreserve.empty()) {
+    PHINode *PN = PNtoPreserve.back();
+    PNtoPreserve.pop_back();
 
     BasicBlock *BB = PN->getParent();
     if (PN == BB->begin())

Added: polly/trunk/test/CodePreparation/multiple_loops_trivial_phis.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/CodePreparation/multiple_loops_trivial_phis.ll?rev=205320&view=auto
==============================================================================
--- polly/trunk/test/CodePreparation/multiple_loops_trivial_phis.ll (added)
+++ polly/trunk/test/CodePreparation/multiple_loops_trivial_phis.ll Tue Apr  1 11:01:33 2014
@@ -0,0 +1,56 @@
+; RUN: opt %loadPolly -S -polly-prepare < %s | FileCheck %s
+; ModuleID = 'multiple_loops_trivial_phis.ll'
+;
+; int f(int * __restrict__ A) {
+;   int i, j, sum = 0;
+;   for (i = 0; i < 100; i++) {
+;     sum *= 2;
+;     for (j = 0; j < 100; j++) {
+;       sum += A[i+j];
+;     }
+;   }
+;   return sum;
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @f(i32* noalias %A) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc5
+  %sum.04 = phi i32 [ 0, %entry ], [ %add4.lcssa, %for.inc5 ]
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %2, %for.inc5 ]
+  %mul = shl nsw i32 %sum.04, 1
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %for.inc
+  %sum.12 = phi i32 [ %mul, %for.body ], [ %add4, %for.inc ]
+  %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ]
+  %0 = add i64 %indvars.iv23, %indvars.iv1
+  %arrayidx = getelementptr i32* %A, i64 %0
+  %tmp5 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, %sum.12
+  %1 = add nuw nsw i64 %indvars.iv1, 1
+  %exitcond5 = icmp eq i64 %1, 100
+  br i1 %exitcond5, label %for.inc5, label %for.inc
+
+for.inc5:                                         ; preds = %for.inc
+  %add4.lcssa = phi i32 [ %add4, %for.inc ]
+  %2 = add nuw nsw i64 %indvars.iv23, 1
+  %exitcond = icmp eq i64 %2, 100
+  br i1 %exitcond, label %for.end7, label %for.body
+
+for.end7:                                         ; preds = %for.inc5
+  %add4.lcssa.lcssa = phi i32 [ %add4.lcssa, %for.inc5 ]
+  ret i32 %add4.lcssa.lcssa
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; Verify that only two allocas are created. (instead of 4!)
+; CHECK: alloca
+; CHECK: alloca
+; CHECK-NOT: alloca

Added: polly/trunk/test/CodePreparation/single_loop_trivial_phi.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/CodePreparation/single_loop_trivial_phi.ll?rev=205320&view=auto
==============================================================================
--- polly/trunk/test/CodePreparation/single_loop_trivial_phi.ll (added)
+++ polly/trunk/test/CodePreparation/single_loop_trivial_phi.ll Tue Apr  1 11:01:33 2014
@@ -0,0 +1,50 @@
+; RUN: opt %loadPolly -S -polly-prepare < %s | FileCheck %s
+; ModuleID = 'single_loop_trivial_phi.ll'
+;
+; int f(int *A, int N) {
+;   int i, sum = 0;
+;   for (i = 0; i < N; i++)
+;     sum += A[i];
+;   return sum;
+; }
+; ModuleID = 'stack-slots.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @f(i32* %A, i32 %N) #0 {
+entry:
+  %cmp1 = icmp sgt i32 %N, 0
+  br i1 %cmp1, label %for.inc.lr.ph, label %for.end
+
+for.inc.lr.ph:                                    ; preds = %entry
+  %0 = zext i32 %N to i64
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc.lr.ph, %for.inc
+  %sum.03 = phi i32 [ 0, %for.inc.lr.ph ], [ %add, %for.inc ]
+  %indvars.iv2 = phi i64 [ 0, %for.inc.lr.ph ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr i32* %A, i64 %indvars.iv2
+  %tmp1 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp1, %sum.03
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, %0
+  br i1 %exitcond, label %for.inc, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.inc
+  %add.lcssa = phi i32 [ %add, %for.inc ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %sum.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; Verify that only two allocas are created.
+; Both are needed for the %sum.0 PHI node and none should be created for the
+; %sum.0.lcssa PHI node
+; CHECK: alloca
+; CHECK: alloca
+; CHECK-NOT: alloca





More information about the llvm-commits mailing list