[llvm] r312930 - Unmerge GEPs to reduce register pressure on IndirectBr edges.

Hiroshi Yamauchi via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 11 10:52:09 PDT 2017


Author: yamauchi
Date: Mon Sep 11 10:52:08 2017
New Revision: 312930

URL: http://llvm.org/viewvc/llvm-project?rev=312930&view=rev
Log:
Unmerge GEPs to reduce register pressure on IndirectBr edges.

Summary:
GEP merging can sometimes increase the number of live values and register
pressure across control edges and cause performance problems particularly if the
increased register pressure results in spills.

This change implements GEP unmerging around an IndirectBr in certain cases to
mitigate the issue. This is in the CodeGenPrepare pass (after all the GEP
merging has happened.)

With this patch, the Python interpreter loop runs faster by ~5%.

Reviewers: sanjoy, hfinkel

Reviewed By: hfinkel

Subscribers: eastig, junbuml, llvm-commits

Differential Revision: https://reviews.llvm.org/D36772

Added:
    llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll
Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=312930&r1=312929&r2=312930&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Mon Sep 11 10:52:08 2017
@@ -6189,6 +6189,170 @@ static bool splitMergedValStore(StoreIns
   return true;
 }
 
+// Return true if the GEP has two operands, the first operand is of a sequential
+// type, and the second operand is a constant.
+static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
+  gep_type_iterator I = gep_type_begin(*GEP);
+  return GEP->getNumOperands() == 2 &&
+      I.isSequential() &&
+      isa<ConstantInt>(GEP->getOperand(1));
+}
+
+// Try unmerging GEPs to reduce liveness interference (register pressure) across
+// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
+// reducing liveness interference across those edges benefits global register
+// allocation. Currently handles only certain cases.
+//
+// For example, unmerge %GEPI and %UGEPI as below.
+//
+// ---------- BEFORE ----------
+// SrcBlock:
+//   ...
+//   %GEPIOp = ...
+//   ...
+//   %GEPI = gep %GEPIOp, Idx
+//   ...
+//   indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
+//   (* %GEPI is alive on the indirectbr edges due to other uses ahead)
+//   (* %GEPIOp is alive on the indirectbr edges only because of it's used by
+//   %UGEPI)
+//
+// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
+// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
+// ...
+//
+// DstBi:
+//   ...
+//   %UGEPI = gep %GEPIOp, UIdx
+// ...
+// ---------------------------
+//
+// ---------- AFTER ----------
+// SrcBlock:
+//   ... (same as above)
+//    (* %GEPI is still alive on the indirectbr edges)
+//    (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
+//    unmerging)
+// ...
+//
+// DstBi:
+//   ...
+//   %UGEPI = gep %GEPI, (UIdx-Idx)
+//   ...
+// ---------------------------
+//
+// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
+// no longer alive on them.
+//
+// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
+// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
+// not to disable further simplications and optimizations as a result of GEP
+// merging.
+//
+// Note this unmerging may increase the length of the data flow critical path
+// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
+// between the register pressure and the length of data-flow critical
+// path. Restricting this to the uncommon IndirectBr case would minimize the
+// impact of potentially longer critical path, if any, and the impact on compile
+// time.
+static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
+                                             const TargetTransformInfo *TTI) {
+  BasicBlock *SrcBlock = GEPI->getParent();
+  // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
+  // (non-IndirectBr) cases exit early here.
+  if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
+    return false;
+  // Check that GEPI is a simple gep with a single constant index.
+  if (!GEPSequentialConstIndexed(GEPI))
+    return false;
+  ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
+  // Check that GEPI is a cheap one.
+  if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
+      > TargetTransformInfo::TCC_Basic)
+    return false;
+  Value *GEPIOp = GEPI->getOperand(0);
+  // Check that GEPIOp is an instruction that's also defined in SrcBlock.
+  if (!isa<Instruction>(GEPIOp))
+    return false;
+  auto *GEPIOpI = cast<Instruction>(GEPIOp);
+  if (GEPIOpI->getParent() != SrcBlock)
+    return false;
+  // Check that GEP is used outside the block, meaning it's alive on the
+  // IndirectBr edge(s).
+  if (find_if(GEPI->users(), [&](User *Usr) {
+        if (auto *I = dyn_cast<Instruction>(Usr)) {
+          if (I->getParent() != SrcBlock) {
+            return true;
+          }
+        }
+        return false;
+      }) == GEPI->users().end())
+    return false;
+  // The second elements of the GEP chains to be unmerged.
+  std::vector<GetElementPtrInst *> UGEPIs;
+  // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
+  // on IndirectBr edges.
+  for (User *Usr : GEPIOp->users()) {
+    if (Usr == GEPI) continue;
+    // Check if Usr is an Instruction. If not, give up.
+    if (!isa<Instruction>(Usr))
+      return false;
+    auto *UI = cast<Instruction>(Usr);
+    // Check if Usr in the same block as GEPIOp, which is fine, skip.
+    if (UI->getParent() == SrcBlock)
+      continue;
+    // Check if Usr is a GEP. If not, give up.
+    if (!isa<GetElementPtrInst>(Usr))
+      return false;
+    auto *UGEPI = cast<GetElementPtrInst>(Usr);
+    // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
+    // the pointer operand to it. If so, record it in the vector. If not, give
+    // up.
+    if (!GEPSequentialConstIndexed(UGEPI))
+      return false;
+    if (UGEPI->getOperand(0) != GEPIOp)
+      return false;
+    if (GEPIIdx->getType() !=
+        cast<ConstantInt>(UGEPI->getOperand(1))->getType())
+      return false;
+    ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+    if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
+        > TargetTransformInfo::TCC_Basic)
+      return false;
+    UGEPIs.push_back(UGEPI);
+  }
+  if (UGEPIs.size() == 0)
+    return false;
+  // Check the materializing cost of (Uidx-Idx).
+  for (GetElementPtrInst *UGEPI : UGEPIs) {
+    ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+    APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
+    unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
+    if (ImmCost > TargetTransformInfo::TCC_Basic)
+      return false;
+  }
+  // Now unmerge between GEPI and UGEPIs.
+  for (GetElementPtrInst *UGEPI : UGEPIs) {
+    UGEPI->setOperand(0, GEPI);
+    ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
+    Constant *NewUGEPIIdx =
+        ConstantInt::get(GEPIIdx->getType(),
+                         UGEPIIdx->getValue() - GEPIIdx->getValue());
+    UGEPI->setOperand(1, NewUGEPIIdx);
+    // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
+    // inbounds to avoid UB.
+    if (!GEPI->isInBounds()) {
+      UGEPI->setIsInBounds(false);
+    }
+  }
+  // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
+  // alive on IndirectBr edges).
+  assert(find_if(GEPIOp->users(), [&](User *Usr) {
+        return cast<Instruction>(Usr)->getParent() != SrcBlock;
+      }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock");
+  return true;
+}
+
 bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
@@ -6302,6 +6466,9 @@ bool CodeGenPrepare::optimizeInst(Instru
       optimizeInst(NC, ModifiedDT);
       return true;
     }
+    if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
+      return true;
+    }
     return false;
   }
 

Added: llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll?rev=312930&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/gep-unmerging.ll Mon Sep 11 10:52:08 2017
@@ -0,0 +1,60 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+ at exit_addr = constant i8* blockaddress(@gep_unmerging, %exit)
+ at op1_addr = constant i8* blockaddress(@gep_unmerging, %op1)
+ at op2_addr = constant i8* blockaddress(@gep_unmerging, %op2)
+ at op3_addr = constant i8* blockaddress(@gep_unmerging, %op3)
+ at dummy = global i8 0
+
+define void @gep_unmerging(i1 %pred, i8* %p0) {
+entry:
+  %table = alloca [256 x i8*]
+  %table_0 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 0
+  %table_1 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 1
+  %table_2 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 2
+  %table_3 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 3
+  %exit_a = load i8*, i8** @exit_addr
+  %op1_a = load i8*, i8** @op1_addr
+  %op2_a = load i8*, i8** @op2_addr
+  %op3_a = load i8*, i8** @op3_addr
+  store i8* %exit_a, i8** %table_0
+  store i8* %op1_a, i8** %table_1
+  store i8* %op2_a, i8** %table_2
+  store i8* %op3_a, i8** %table_3
+  br label %indirectbr
+
+op1:
+; CHECK-LABEL: op1:
+; CHECK-NEXT: %p1_inc2 = getelementptr i8, i8* %p_postinc, i64 2
+; CHECK-NEXT: %p1_inc1 = getelementptr i8, i8* %p_postinc, i64 1
+  %p1_inc2 = getelementptr i8, i8* %p_preinc, i64 3
+  %p1_inc1 = getelementptr i8, i8* %p_preinc, i64 2
+  %a10 = load i8, i8* %p_postinc
+  %a11 = load i8, i8* %p1_inc1
+  %a12 = add i8 %a10, %a11
+  store i8 %a12, i8* @dummy
+  br i1 %pred, label %indirectbr, label %exit
+
+op2:
+; CHECK-LABEL: op2:
+; CHECK-NEXT: %p2_inc = getelementptr i8, i8* %p_postinc, i64 1
+  %p2_inc = getelementptr i8, i8* %p_preinc, i64 2
+  %a2 = load i8, i8* %p_postinc
+  store i8 %a2, i8* @dummy
+  br i1 %pred, label %indirectbr, label %exit
+
+op3:
+  br i1 %pred, label %indirectbr, label %exit
+
+indirectbr:
+  %p_preinc = phi i8* [%p0, %entry], [%p1_inc2, %op1], [%p2_inc, %op2], [%p_postinc, %op3]
+  %p_postinc = getelementptr i8, i8* %p_preinc, i64 1
+  %next_op = load i8, i8* %p_preinc
+  %p_zext = zext i8 %next_op to i64
+  %slot = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 %p_zext 
+  %target = load i8*, i8** %slot
+  indirectbr i8* %target, [label %exit, label %op1, label %op2]
+
+exit:
+  ret void
+}




More information about the llvm-commits mailing list