[PATCH] D45008: [NVPTX] Disable block placement tail duplciation in structured CFG.

Tim Shen via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 28 17:39:08 PDT 2018


timshen created this revision.
timshen added reviewers: jlebar, arsenm.
Herald added subscribers: hiraditya, tpr, wdng, sanjoy, jholewinski.

Tail duplication easily breaks the structure of CFG, e.g. duplicating on
a region entry. If the structure is intended to be preserved, then we
may want to configure tail duplication, or disable it for structured
CFG. From our benchmark results disabling it doesn't cause performance
regression.

Also add a temporary flag to "roll back" the behavior for easy deployment.

This fixes several internal Nvidia GPU test failures that we suspect to be
ptxas miscompiles (PR27738).

Notice that this also affects AMDGPU backend.

All unit tests still pass.


https://reviews.llvm.org/D45008

Files:
  llvm/lib/CodeGen/MachineBlockPlacement.cpp
  llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp


Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
===================================================================
--- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -44,6 +44,14 @@
                                cl::desc("Disable load/store vectorizer"),
                                cl::init(false), cl::Hidden);
 
+// TODO: Remove this flag when we are confident with no regressions.
+static cl::opt<bool> DisableRequireStructuredCFG(
+    "disable-nvptx-require-structured-cfg",
+    cl::desc("Transitional flag to turn ofo NVPTX's requirement on preserving "
+             "structured CFG. The requirement should be disabled only when "
+             "unexpected regressions happen."),
+    cl::init(false), cl::Hidden);
+
 namespace llvm {
 
 void initializeNVVMIntrRangePass(PassRegistry&);
@@ -108,6 +116,8 @@
     drvInterface = NVPTX::NVCL;
   else
     drvInterface = NVPTX::CUDA;
+  if (!DisableRequireStructuredCFG)
+    setRequiresStructuredCFG(true);
   initAsmInfo();
 }
 
Index: llvm/lib/CodeGen/MachineBlockPlacement.cpp
===================================================================
--- llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -513,6 +513,11 @@
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
+  bool allowTailDupPlacement() const {
+    assert(F);
+    return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
+  }
+
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineBlockFrequencyInfo>();
@@ -1018,7 +1023,7 @@
     MachineBasicBlock *Succ1 = BestA.Dest;
     MachineBasicBlock *Succ2 = BestB.Dest;
     // Check to see if tail-duplication would be profitable.
-    if (TailDupPlacement && shouldTailDuplicate(Succ2) &&
+    if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
         canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
         isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
                               Chain, BlockFilter)) {
@@ -1044,7 +1049,7 @@
   return Result;
 }
 
-/// When the option TailDupPlacement is on, this method checks if the
+/// When the option allowTailDupPlacement() is on, this method checks if the
 /// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
 /// into all of its unplaced, unfiltered predecessors, that are not BB.
 bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
@@ -1493,7 +1498,7 @@
     if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
                                    Chain, BlockFilter)) {
       // If tail duplication would make Succ profitable, place it.
-      if (TailDupPlacement && shouldTailDuplicate(Succ))
+      if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
         DupCandidates.push_back(std::make_tuple(SuccProb, Succ));
       continue;
     }
@@ -1702,7 +1707,7 @@
     auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
     MachineBasicBlock* BestSucc = Result.BB;
     bool ShouldTailDup = Result.ShouldTailDup;
-    if (TailDupPlacement)
+    if (allowTailDupPlacement())
       ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
 
     // If an immediate successor isn't available, look for the best viable
@@ -1724,7 +1729,7 @@
 
     // Placement may have changed tail duplication opportunities.
     // Check for that now.
-    if (TailDupPlacement && BestSucc && ShouldTailDup) {
+    if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
       // If the chosen successor was duplicated into all its predecessors,
       // don't bother laying it out, just go round the loop again with BB as
       // the chain end.
@@ -2758,7 +2763,7 @@
       TailDupSize = TailDupPlacementAggressiveThreshold;
   }
 
-  if (TailDupPlacement) {
+  if (allowTailDupPlacement()) {
     MPDT = &getAnalysis<MachinePostDominatorTree>();
     if (MF.getFunction().optForSize())
       TailDupSize = 1;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D45008.140164.patch
Type: text/x-patch
Size: 4074 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180329/b388fdd3/attachment.bin>


More information about the llvm-commits mailing list