[llvm] e6ce0db - [MCA] Ensure that writes occur in-order

Andrew Savonichev via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 18 07:11:48 PDT 2021


Author: Andrew Savonichev
Date: 2021-03-18T17:10:20+03:00
New Revision: e6ce0db378473c1d264152f370af719903b98bf8

URL: https://github.com/llvm/llvm-project/commit/e6ce0db378473c1d264152f370af719903b98bf8
DIFF: https://github.com/llvm/llvm-project/commit/e6ce0db378473c1d264152f370af719903b98bf8.diff

LOG: [MCA] Ensure that writes occur in-order

Delay the issue of a new instruction if that leads to out-of-order
commits of writes.

This patch fixes the problem described in:
https://bugs.llvm.org/show_bug.cgi?id=41796#c3

Differential Revision: https://reviews.llvm.org/D98604

Added: 
    

Modified: 
    llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
    llvm/lib/MCA/Stages/InOrderIssueStage.cpp
    llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
    llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
    llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
index 0b4ea99d06db..867a6c1df3c5 100644
--- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -50,6 +50,11 @@ class InOrderIssueStage final : public Stage {
   /// Number of instructions that can be issued in the current cycle.
   unsigned Bandwidth;
 
+  /// Number of cycles (counted from the current cycle) until the last write is
+  /// committed. This is taken into account to ensure that writes commit in the
+  /// program order.
+  unsigned LastWriteBackCycle;
+
   InOrderIssueStage(const InOrderIssueStage &Other) = delete;
   InOrderIssueStage &operator=(const InOrderIssueStage &Other) = delete;
 
@@ -69,7 +74,7 @@ class InOrderIssueStage final : public Stage {
                     const MCSchedModel &SM, const MCSubtargetInfo &STI)
       : SM(SM), STI(STI), RCU(RCU), PRF(PRF),
         RM(std::make_unique<ResourceManager>(SM)), NumIssued(0),
-        StallCyclesLeft(0), Bandwidth(0) {}
+        StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {}
 
   bool isAvailable(const InstRef &) const override;
   bool hasWorkToComplete() const override;

diff  --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
index a675b92e1068..dd2270d3a8f3 100644
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -57,6 +57,32 @@ static bool hasResourceHazard(const ResourceManager &RM, const InstRef &IR) {
   return false;
 }
 
+static unsigned findLastWriteBackCycle(const InstRef &IR) {
+  unsigned LastWBCycle = 0;
+  for (const WriteState &WS : IR.getInstruction()->getDefs()) {
+    int CyclesLeft = WS.getCyclesLeft();
+    if (CyclesLeft == UNKNOWN_CYCLES)
+      CyclesLeft = WS.getLatency();
+    if (CyclesLeft < 0)
+      CyclesLeft = 0;
+    LastWBCycle = std::max(LastWBCycle, (unsigned)CyclesLeft);
+  }
+  return LastWBCycle;
+}
+
+static unsigned findFirstWriteBackCycle(const InstRef &IR) {
+  unsigned FirstWBCycle = ~0U;
+  for (const WriteState &WS : IR.getInstruction()->getDefs()) {
+    int CyclesLeft = WS.getCyclesLeft();
+    if (CyclesLeft == UNKNOWN_CYCLES)
+      CyclesLeft = WS.getLatency();
+    if (CyclesLeft < 0)
+      CyclesLeft = 0;
+    FirstWBCycle = std::min(FirstWBCycle, (unsigned)CyclesLeft);
+  }
+  return FirstWBCycle;
+}
+
 /// Return a number of cycles left until register requirements of the
 /// instructions are met.
 static unsigned checkRegisterHazard(const RegisterFile &PRF,
@@ -116,6 +142,14 @@ bool InOrderIssueStage::canExecute(const InstRef &IR,
         HWStallEvent(HWStallEvent::DispatchGroupStall, IR));
     notifyEvent<HWPressureEvent>(
         HWPressureEvent(HWPressureEvent::RESOURCES, IR));
+  } else if (LastWriteBackCycle) {
+    if (!IR.getInstruction()->getDesc().RetireOOO) {
+      unsigned NextWriteBackCycle = findFirstWriteBackCycle(IR);
+      // Delay the instruction to ensure that writes occur in program order
+      if (NextWriteBackCycle < LastWriteBackCycle) {
+        *StallCycles = LastWriteBackCycle - NextWriteBackCycle;
+      }
+    }
   }
 
   return *StallCycles == 0;
@@ -213,6 +247,9 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
   IssuedInst.push_back(IR);
   ++NumIssued;
 
+  if (!IR.getInstruction()->getDesc().RetireOOO)
+    LastWriteBackCycle = findLastWriteBackCycle(IR);
+
   return llvm::ErrorSuccess();
 }
 
@@ -285,6 +322,10 @@ llvm::Error InOrderIssueStage::cycleStart() {
 llvm::Error InOrderIssueStage::cycleEnd() {
   if (StallCyclesLeft > 0)
     --StallCyclesLeft;
+
+  if (LastWriteBackCycle > 0)
+    --LastWriteBackCycle;
+
   return llvm::ErrorSuccess();
 }
 

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
index 35149b09f66f..a672c8c879ae 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
@@ -35,7 +35,7 @@ str	w0, [x21, x18, lsl #2]
 # CHECK-NEXT:  1      4     1.00           *            str	w0, [x21, x18, lsl #2]
 
 # CHECK:      Dynamic Dispatch Stall Cycles:
-# CHECK-NEXT: RAT     - Register unavailable:                      10  (47.6%)
+# CHECK-NEXT: RAT     - Register unavailable:                      8  (38.1%)
 # CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
 # CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
 # CHECK-NEXT: LQ      - Load queue full:                           0

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
index f6b3f622a38b..1d4e41a63c63 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
@@ -35,7 +35,7 @@ str	w0, [x21, x18, lsl #2]
 # CHECK-NEXT:  1      4     1.00           *            str	w0, [x21, x18, lsl #2]
 
 # CHECK:      Dynamic Dispatch Stall Cycles:
-# CHECK-NEXT: RAT     - Register unavailable:                      10  (47.6%)
+# CHECK-NEXT: RAT     - Register unavailable:                      8  (38.1%)
 # CHECK-NEXT: RCU     - Retire tokens unavailable:                 0
 # CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
 # CHECK-NEXT: LQ      - Load queue full:                           0
@@ -106,13 +106,13 @@ str	w0, [x21, x18, lsl #2]
 # CHECK:      [0,0]     DeeER.    .    .    .   ldr	w4, [x2], #4
 # CHECK-NEXT: [0,1]     .DeeER    .    .    .   ldr	w5, [x3]
 # CHECK-NEXT: [0,2]     .   DeeeER.    .    .   madd	w0, w5, w4, w0
-# CHECK-NEXT: [0,3]     .   DeeE-R.    .    .   add	x3, x3, x13
+# CHECK-NEXT: [0,3]     .    DeeER.    .    .   add	x3, x3, x13
 # CHECK-NEXT: [0,4]     .    DeeER.    .    .   subs	x1, x1, #1
 # CHECK-NEXT: [0,5]     .    . DeeeER  .    .   str	w0, [x21, x18, lsl #2]
 # CHECK-NEXT: [1,0]     .    .  DeeER  .    .   ldr	w4, [x2], #4
 # CHECK-NEXT: [1,1]     .    .   DeeER .    .   ldr	w5, [x3]
 # CHECK-NEXT: [1,2]     .    .    . DeeeER  .   madd	w0, w5, w4, w0
-# CHECK-NEXT: [1,3]     .    .    . DeeE-R  .   add	x3, x3, x13
+# CHECK-NEXT: [1,3]     .    .    .  DeeER  .   add	x3, x3, x13
 # CHECK-NEXT: [1,4]     .    .    .  DeeER  .   subs	x1, x1, #1
 # CHECK-NEXT: [1,5]     .    .    .    DeeeER   str	w0, [x21, x18, lsl #2]
 
@@ -126,7 +126,7 @@ str	w0, [x21, x18, lsl #2]
 # CHECK-NEXT: 0.     2     0.0    0.0    0.0       ldr	w4, [x2], #4
 # CHECK-NEXT: 1.     2     0.0    0.0    0.0       ldr	w5, [x3]
 # CHECK-NEXT: 2.     2     0.0    0.0    0.0       madd	w0, w5, w4, w0
-# CHECK-NEXT: 3.     2     0.0    0.0    1.0       add	x3, x3, x13
+# CHECK-NEXT: 3.     2     0.0    0.0    0.0       add	x3, x3, x13
 # CHECK-NEXT: 4.     2     0.0    0.0    0.0       subs	x1, x1, #1
 # CHECK-NEXT: 5.     2     0.0    0.0    0.0       str	w0, [x21, x18, lsl #2]
-# CHECK-NEXT:        2     0.0    0.0    0.2       <total>
+# CHECK-NEXT:        2     0.0    0.0    0.0       <total>

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
index 71c1a0620607..de5dbaa3490c 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
@@ -10,12 +10,12 @@ add	w7, w9, w0
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      12
-# CHECK-NEXT: Total Cycles:      18
+# CHECK-NEXT: Total Cycles:      20
 # CHECK-NEXT: Total uOps:        12
 
 # CHECK:      Dispatch Width:    2
-# CHECK-NEXT: uOps Per Cycle:    0.67
-# CHECK-NEXT: IPC:               0.67
+# CHECK-NEXT: uOps Per Cycle:    0.60
+# CHECK-NEXT: IPC:               0.60
 # CHECK-NEXT: Block RThroughput: 8.0
 
 # CHECK:      Instruction Info:
@@ -40,33 +40,37 @@ add	w7, w9, w0
 # CHECK-NEXT: SCHEDQ  - Scheduler full:                            0
 # CHECK-NEXT: LQ      - Load queue full:                           0
 # CHECK-NEXT: SQ      - Store queue full:                          0
-# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 5  (27.8%)
+# CHECK-NEXT: GROUP   - Static restrictions on the dispatch group: 1  (5.0%)
 
 # CHECK:      Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
 # CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT:  0,              12  (66.7%)
-# CHECK-NEXT:  2,              6  (33.3%)
+# CHECK-NEXT:  0,              12  (60.0%)
+# CHECK-NEXT:  1,              4  (20.0%)
+# CHECK-NEXT:  2,              4  (20.0%)
 
 # CHECK:      Schedulers - number of cycles where we saw N micro opcodes issued:
 # CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT:  0,          12  (66.7%)
-# CHECK-NEXT:  2,          6  (33.3%)
+# CHECK-NEXT:  0,          12  (60.0%)
+# CHECK-NEXT:  1,          4  (20.0%)
+# CHECK-NEXT:  2,          4  (20.0%)
 
 # CHECK:      Scheduler's queue usage:
 # CHECK-NEXT: No scheduler resources used.
 
 # CHECK:      Retire Control Unit - number of cycles where we saw N instructions retired:
 # CHECK-NEXT: [# retired], [# cycles]
-# CHECK-NEXT:  0,           16  (88.9%)
-# CHECK-NEXT:  6,           2  (11.1%)
+# CHECK-NEXT:  0,           14  (70.0%)
+# CHECK-NEXT:  1,           2  (10.0%)
+# CHECK-NEXT:  2,           2  (10.0%)
+# CHECK-NEXT:  3,           2  (10.0%)
 
 # CHECK:      Total ROB Entries:                64
-# CHECK-NEXT: Max Used ROB Entries:             8  ( 12.5% )
-# CHECK-NEXT: Average Used ROB Entries per cy:  5  ( 7.8% )
+# CHECK-NEXT: Max Used ROB Entries:             7  ( 10.9% )
+# CHECK-NEXT: Average Used ROB Entries per cy:  2  ( 3.1% )
 
 # CHECK:      Register File statistics:
 # CHECK-NEXT: Total number of mappings created:    12
-# CHECK-NEXT: Max number of mappings used:         8
+# CHECK-NEXT: Max number of mappings used:         7
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - CortexA55UnitALU
@@ -96,21 +100,21 @@ add	w7, w9, w0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     add	w7, w9, w0
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     01234567
+# CHECK-NEXT:                     0123456789
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeeeeER.    . .   sdiv	w12, w21, w0
-# CHECK-NEXT: [0,1]     DeeE-----R.    . .   add	w8, w8, #1
-# CHECK-NEXT: [0,2]     .DeeE----R.    . .   add	w1, w2, w0
-# CHECK-NEXT: [0,3]     .DeeE----R.    . .   add	w3, w4, #1
-# CHECK-NEXT: [0,4]     . DeeE---R.    . .   add	w5, w6, w0
-# CHECK-NEXT: [0,5]     . DeeE---R.    . .   add	w7, w9, w0
-# CHECK-NEXT: [1,0]     .    .  DeeeeeeeER   sdiv	w12, w21, w0
-# CHECK-NEXT: [1,1]     .    .  DeeE-----R   add	w8, w8, #1
-# CHECK-NEXT: [1,2]     .    .   DeeE----R   add	w1, w2, w0
-# CHECK-NEXT: [1,3]     .    .   DeeE----R   add	w3, w4, #1
-# CHECK-NEXT: [1,4]     .    .    DeeE---R   add	w5, w6, w0
-# CHECK-NEXT: [1,5]     .    .    DeeE---R   add	w7, w9, w0
+# CHECK:      [0,0]     DeeeeeeeER.    .   .   sdiv	w12, w21, w0
+# CHECK-NEXT: [0,1]     .    DeeER.    .   .   add	w8, w8, #1
+# CHECK-NEXT: [0,2]     .    DeeER.    .   .   add	w1, w2, w0
+# CHECK-NEXT: [0,3]     .    .DeeER    .   .   add	w3, w4, #1
+# CHECK-NEXT: [0,4]     .    .DeeER    .   .   add	w5, w6, w0
+# CHECK-NEXT: [0,5]     .    . DeeER   .   .   add	w7, w9, w0
+# CHECK-NEXT: [1,0]     .    .  DeeeeeeeER .   sdiv	w12, w21, w0
+# CHECK-NEXT: [1,1]     .    .    .  DeeER .   add	w8, w8, #1
+# CHECK-NEXT: [1,2]     .    .    .  DeeER .   add	w1, w2, w0
+# CHECK-NEXT: [1,3]     .    .    .   DeeER.   add	w3, w4, #1
+# CHECK-NEXT: [1,4]     .    .    .   DeeER.   add	w5, w6, w0
+# CHECK-NEXT: [1,5]     .    .    .    DeeER   add	w7, w9, w0
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -120,9 +124,9 @@ add	w7, w9, w0
 
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     2     0.0    0.0    0.0       sdiv	w12, w21, w0
-# CHECK-NEXT: 1.     2     0.0    0.0    5.0       add	w8, w8, #1
-# CHECK-NEXT: 2.     2     0.0    0.0    4.0       add	w1, w2, w0
-# CHECK-NEXT: 3.     2     0.0    0.0    4.0       add	w3, w4, #1
-# CHECK-NEXT: 4.     2     0.0    0.0    3.0       add	w5, w6, w0
-# CHECK-NEXT: 5.     2     0.0    0.0    3.0       add	w7, w9, w0
-# CHECK-NEXT:        2     0.0    0.0    3.2       <total>
+# CHECK-NEXT: 1.     2     0.0    0.0    0.0       add	w8, w8, #1
+# CHECK-NEXT: 2.     2     0.0    0.0    0.0       add	w1, w2, w0
+# CHECK-NEXT: 3.     2     0.0    0.0    0.0       add	w3, w4, #1
+# CHECK-NEXT: 4.     2     0.0    0.0    0.0       add	w5, w6, w0
+# CHECK-NEXT: 5.     2     0.0    0.0    0.0       add	w7, w9, w0
+# CHECK-NEXT:        2     0.0    0.0    0.0       <total>


        


More information about the llvm-commits mailing list