[llvm] d84b320 - [MacroFusion] Limit the max fused number as 2 to reduce the dependency

QingShan Zhang via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 3 21:09:37 PST 2019


Author: QingShan Zhang
Date: 2019-12-04T05:05:35Z
New Revision: d84b320dfd0a7dbedacc287ede5e5bc4c0f113ba

URL: https://github.com/llvm/llvm-project/commit/d84b320dfd0a7dbedacc287ede5e5bc4c0f113ba
DIFF: https://github.com/llvm/llvm-project/commit/d84b320dfd0a7dbedacc287ede5e5bc4c0f113ba.diff

LOG: [MacroFusion] Limit the max fused number as 2 to reduce the dependency

This is the example:

int foo(int a, int b, int c, int d) {
  return a + b + c + d;
}

And this is the Dependency Graph:
+------+       +------+       +------+       +------+
|  A   |       |  B   |       |  C   |       |  D   |
+--+--++       +---+--+       +--+---+       +--+---+
   ^  ^            ^  ^          ^              ^
   |  |            |  |          |              |
   |  |            |  |New1      +--------------+
   |  |            |  |          |
   |  |            |  |       +--+---+
   |  |New2        |  +-------+ ADD1 |
   |  |            |          +--+---+
   |  |            |    Fuse     ^
   |  |            +-------------+
   |  +------------+
   |               |
   |   Fuse     +--+---+
   +----------->+ ADD2 |
   |            +------+
+--+---+
| ADD3 |
+------+

We need also create an artificial edge from ADD1 to A if
https://reviews.llvm.org/D69998 is landed. That will force the Node A scheduled
before the ADD1 and ADD2. But in fact, it is ok to schedule the Node A
in-between ADD3 and ADD2, as ADD3 and ADD2 are NOT a fusion pair because
ADD2 has been matched to ADD1. We are creating these unnecessary dependency
edges that override the heuristics.

Differential Revision: https://reviews.llvm.org/D70066

Added: 
    

Modified: 
    llvm/lib/CodeGen/MacroFusion.cpp
    llvm/test/CodeGen/AArch64/macro-fusion.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index d21eae222af0..26cbc14166be 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -36,6 +36,21 @@ static bool isHazard(const SDep &Dep) {
   return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
 }
 
+static SUnit *getPredClusterSU(const SUnit &SU) {
+  for (const SDep &SI : SU.Preds)
+    if (SI.isCluster())
+      return SI.getSUnit();
+
+  return nullptr;
+}
+
+static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+  unsigned Num = 1;
+  const SUnit *CurrentSU = &SU;
+  while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
+  return Num < FuseLimit;
+}
+
 static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
                                 SUnit &SecondSU) {
   // Check that neither instr is already paired with another along the edge
@@ -161,8 +176,10 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
     if (DepSU.isBoundaryNode())
       continue;
 
+    // Only chain two instructions together at most.
     const MachineInstr *DepMI = DepSU.getInstr();
-    if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+    if (!hasLessThanNumFused(DepSU, 2) ||
+        !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
       continue;
 
     if (fuseInstructionPair(DAG, DepSU, AnchorSU))

diff  --git a/llvm/test/CodeGen/AArch64/macro-fusion.ll b/llvm/test/CodeGen/AArch64/macro-fusion.ll
index 97bca14df579..b9a263fe2e23 100644
--- a/llvm/test/CodeGen/AArch64/macro-fusion.ll
+++ b/llvm/test/CodeGen/AArch64/macro-fusion.ll
@@ -1,21 +1,18 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+fuse-arith-logic -verify-misched -debug-only=machine-scheduler 2>&1 > /dev/null | FileCheck %s
 
-; Verify that, the macro-fusion creates the necessary dependencies between SUs.
+; Verify that, the macro-fusion creates the necessary dependencies between SUs and
+; only 2 SU's are fused at most.
 define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) {
 entry:
 ; CHECK: ********** MI Scheduling **********
 ; CHECK-LABEL: %bb.0 entry 
 ; CHECK: Macro fuse: SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]])
 ; CHECK: Bind SU([[SU1:[0-9]+]]) - SU([[SU4]])
-; CHECK: Macro fuse: SU([[SU5]]) - SU([[SU6:[0-9]+]])
-; CHECK: Bind SU([[SU0:[0-9]+]]) - SU([[SU5]])
-; CHECK: SU([[SU0]]):   %{{[0-9]+}}:gpr32 = COPY $w3
+; CHECK-NOT: Macro fuse:
 ; CHECK: SU([[SU1]]):   %{{[0-9]+}}:gpr32 = COPY $w2
 ; CHECK: SU([[SU4]]):   %{{[0-9]+}}:gpr32 = nsw ADDWrr
 ; CHECK: SU([[SU5]]):   %{{[0-9]+}}:gpr32 = nsw ADDWrr
-; CHECK: SU([[SU6]]):   %{{[0-9]+}}:gpr32 = nsw SUBWrr
-
   %add = add nsw i32 %b, %a
   %add1 = add nsw i32 %add, %c
   %sub = sub nsw i32 %add1, %d


        


More information about the llvm-commits mailing list