[PATCH] D13417: [MachineCombiner] remove "slack" from critical path cost calculation (PR25016)

Sat Oct 3 14:21:59 PDT 2015

spatel created this revision.
spatel added reviewers: Gerolf, haicheng, hfinkel.
spatel added a subscriber: llvm-commits.

As noted on the dev list:
http://lists.llvm.org/pipermail/llvm-dev/2015-October/090998.html
and PR25016:
https://llvm.org/bugs/show_bug.cgi?id=25016

The MachineCombiner is doing reassociations that don't improve or even worsen the critical path. This is caused by inclusion of the "slack" factor when calculating the critical path of the original code sequence. If we don't add that, then we have a simple cost comparison of the old code sequence vs. a new sequence. It's not clear to me under what conditions "slack" would be useful in this comparison, so I'm proposing to simply remove it.

All existing correct reassociation tests are preserved after this change, and the two failing cases now have identical asm that does what we want:
a + b + c + d ---> (a + b) + (c + d)

http://reviews.llvm.org/D13417

Files:
  lib/CodeGen/MachineCombiner.cpp
  test/CodeGen/X86/machine-combiner.ll

Index: test/CodeGen/X86/machine-combiner.ll
===================================================================

--- test/CodeGen/X86/machine-combiner.ll
+++ test/CodeGen/X86/machine-combiner.ll
@@ -632,10 +632,10 @@
 ; AVX-NEXT:  callq   bar
 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
 ; AVX-NEXT:  callq   bar
-; AVX-NEXT:  vmovsd  (%rsp), %xmm1
-; AVX:       vaddsd  8(%rsp), %xmm1, %xmm1
+; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
+; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
+; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
-; AVX-NEXT:  vaddsd  16(%rsp), %xmm0, %xmm0
 
   %x0 = call double @bar()
   %x1 = call double @bar()
@@ -656,9 +656,10 @@
 ; AVX-NEXT:  callq   bar
 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
 ; AVX-NEXT:  callq   bar
+; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
+; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
-; AVX-NEXT:  vaddsd  8(%rsp), %xmm0, %xmm0
-; AVX-NEXT:  vaddsd  16(%rsp), %xmm0, %xmm0
+; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
 
   %x0 = call double @bar()
   %x1 = call double @bar()
Index: lib/CodeGen/MachineCombiner.cpp
===================================================================
--- lib/CodeGen/MachineCombiner.cpp
+++ lib/CodeGen/MachineCombiner.cpp
@@ -242,20 +242,19 @@
   // Get depth, latency and slack of Root.
   unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
   unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
-  unsigned RootSlack = BlockTrace.getInstrSlack(Root);
 
   DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
         dbgs() << " NewRootDepth: " << NewRootDepth
                << " NewRootLatency: " << NewRootLatency << "\n";
         dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
-               << " RootSlack: " << RootSlack << "\n";
+               << "\n";
         dbgs() << " NewRootDepth + NewRootLatency = "
                << NewRootDepth + NewRootLatency << "\n";
-        dbgs() << " RootDepth + RootLatency + RootSlack = "
-               << RootDepth + RootLatency + RootSlack << "\n";);
+        dbgs() << " RootDepth + RootLatency = "
+               << RootDepth + RootLatency << "\n";);
 
   unsigned NewCycleCount = NewRootDepth + NewRootLatency;
-  unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
+  unsigned OldCycleCount = RootDepth + RootLatency;
   
   if (NewCodeHasLessInsts)
     return NewCycleCount <= OldCycleCount;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D13417.36447.patch
Type: text/x-patch
Size: 2447 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151003/db714976/attachment.bin>