[llvm] [llvm-mca][AMDGPU] Retire instructions that have issue carry over correctly (PR #83881)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 4 11:45:46 PST 2024


https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/83881

>From a72081cf4e8b19c8945cb2898fcac546f8faef81 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 4 Mar 2024 09:55:30 -0800
Subject: [PATCH 1/2] [llvm-mca][AMDGPU] Retire instructions that have issue
 carry over correctly.

https://github.com/llvm/llvm-project/issues/83775 shows llvm-mca hits
sanitizer error in cycleEnd. There was an instruction that takes multiple cycles
to issue and is finished executing directly after issue. Prior to this
patch, the instruction is retired on the first issue cycle, despite
taking multiple cycles to issue.

To fix this, if an instruction takes multiple cycles to issue and is
done executing after issue, let updateCarriedOver retire the instruction
when it is fully issued.
---
 llvm/lib/MCA/Stages/InOrderIssueStage.cpp     | 14 ++-
 .../test/tools/llvm-mca/AMDGPU/carried-over.s | 87 +++++++++++++++++++
 2 files changed, 98 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/tools/llvm-mca/AMDGPU/carried-over.s

diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
index 0f1737dc3cbc1f..ce0ddcbc74ffc5 100644
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -257,8 +257,9 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) {
   }
 
   // If the instruction has a latency of 0, we need to handle
-  // the execution and retirement now.
-  if (IS.isExecuted()) {
+  // the execution and retirement now. If the instruction is issued in multiple
+  // cycles, we cannot retire until it is finished issuing.
+  if (IS.isExecuted() && !ShouldCarryOver) {
     PRF.onInstructionExecuted(&IS);
     LSU.onInstructionExecuted(IR);
     notifyEvent<HWInstructionEvent>(
@@ -299,7 +300,10 @@ void InOrderIssueStage::updateIssuedInst() {
     notifyInstructionExecuted(IR);
     ++NumExecuted;
 
-    retireInstruction(*I);
+    // Allow updateCarriedOver to retire the instruction if the instruction
+    // takes multiple cycles to issue.
+    if (!CarriedOver)
+      retireInstruction(*I);
 
     std::iter_swap(I, E - NumExecuted);
   }
@@ -329,6 +333,10 @@ void InOrderIssueStage::updateCarriedOver() {
   else
     Bandwidth -= CarryOver;
 
+  // updateIssuedInst did not retireInstruction if it was carried over.
+  if (CarriedOver.getInstruction()->isExecuted())
+    retireInstruction(CarriedOver);
+
   CarriedOver = InstRef();
   CarryOver = 0;
 }
diff --git a/llvm/test/tools/llvm-mca/AMDGPU/carried-over.s b/llvm/test/tools/llvm-mca/AMDGPU/carried-over.s
new file mode 100644
index 00000000000000..5812dc8211fc91
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AMDGPU/carried-over.s
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx940 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s
+
+v_pk_mov_b32 v[0:1], v[2:3], v[4:5]
+v_pk_add_f32 v[0:1], v[0:1], v[0:1]
+v_pk_mul_f32 v[0:1], v[0:1], v[0:1]
+v_add_co_u32 v5, s[0:1], v1, v2
+v_sub_co_u32 v5, s[0:1], v1, v2
+v_add_u32 v5, v1, v2
+v_sub_u32 v5, v1, v2
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      7
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Total uOps:        9
+
+# CHECK:      Dispatch Width:    1
+# CHECK-NEXT: uOps Per Cycle:    0.90
+# CHECK-NEXT: IPC:               0.70
+# CHECK-NEXT: Block RThroughput: 9.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     1.00                  U     v_pk_mov_b32 v[0:1], v[2:3], v[4:5]
+# CHECK-NEXT:  1      1     1.00                  U     v_pk_add_f32 v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT:  1      1     1.00                  U     v_pk_mul_f32 v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT:  2      1     1.00                  U     v_add_co_u32_e64 v5, s[0:1], v1, v2
+# CHECK-NEXT:  2      1     1.00                  U     v_sub_co_u32_e64 v5, s[0:1], v1, v2
+# CHECK-NEXT:  1      1     1.00                  U     v_add_u32_e32 v5, v1, v2
+# CHECK-NEXT:  1      1     1.00                  U     v_sub_u32_e32 v5, v1, v2
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - HWBranch
+# CHECK-NEXT: [1]   - HWExport
+# CHECK-NEXT: [2]   - HWLGKM
+# CHECK-NEXT: [3]   - HWSALU
+# CHECK-NEXT: [4]   - HWVALU
+# CHECK-NEXT: [5]   - HWVMEM
+# CHECK-NEXT: [6]   - HWXDL
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]
+# CHECK-NEXT:  -      -      -     2.00   7.00    -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     v_pk_mov_b32 v[0:1], v[2:3], v[4:5]
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     v_pk_add_f32 v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     v_pk_mul_f32 v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT:  -      -      -     1.00   1.00    -      -     v_add_co_u32_e64 v5, s[0:1], v1, v2
+# CHECK-NEXT:  -      -      -     1.00   1.00    -      -     v_sub_co_u32_e64 v5, s[0:1], v1, v2
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     v_add_u32_e32 v5, v1, v2
+# CHECK-NEXT:  -      -      -      -     1.00    -      -     v_sub_u32_e32 v5, v1, v2
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DE   .   .   v_pk_mov_b32 v[0:1], v[2:3], v[4:5]
+# CHECK-NEXT: [0,1]     .DE  .   .   v_pk_add_f32 v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: [0,2]     . DE .   .   v_pk_mul_f32 v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: [0,3]     .  DE.   .   v_add_co_u32_e64 v5, s[0:1], v1, v2
+# CHECK-NEXT: [0,4]     .   DER  .   v_sub_co_u32_e64 v5, s[0:1], v1, v2
+# CHECK-NEXT: [0,5]     .    . DE.   v_add_u32_e32 v5, v1, v2
+# CHECK-NEXT: [0,6]     .    .  DE   v_sub_u32_e32 v5, v1, v2
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     0.0    0.0    0.0       v_pk_mov_b32 v[0:1], v[2:3], v[4:5]
+# CHECK-NEXT: 1.     1     0.0    0.0    0.0       v_pk_add_f32 v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: 2.     1     0.0    0.0    0.0       v_pk_mul_f32 v[0:1], v[0:1], v[0:1]
+# CHECK-NEXT: 3.     1     0.0    0.0    0.0       v_add_co_u32_e64 v5, s[0:1], v1, v2
+# CHECK-NEXT: 4.     1     0.0    0.0    0.0       v_sub_co_u32_e64 v5, s[0:1], v1, v2
+# CHECK-NEXT: 5.     1     0.0    0.0    0.0       v_add_u32_e32 v5, v1, v2
+# CHECK-NEXT: 6.     1     0.0    0.0    0.0       v_sub_u32_e32 v5, v1, v2
+# CHECK-NEXT:        1     0.0    0.0    0.0       <total>

>From fdd62e7398fc2262d6006dd1ccdca31b4e485462 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 4 Mar 2024 11:42:43 -0800
Subject: [PATCH 2/2] fixup! handle instruction executed in updateCarriedOver

---
 llvm/lib/MCA/Stages/InOrderIssueStage.cpp | 29 +++++++++++++++--------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
index ce0ddcbc74ffc5..ef4c572980268c 100644
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -258,7 +258,8 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) {
 
   // If the instruction has a latency of 0, we need to handle
   // the execution and retirement now. If the instruction is issued in multiple
-  // cycles, we cannot retire until it is finished issuing.
+  // cycles, we cannot handle the instruction being executed here so we make
+  // updateCarriedOver responsible.
   if (IS.isExecuted() && !ShouldCarryOver) {
     PRF.onInstructionExecuted(&IS);
     LSU.onInstructionExecuted(IR);
@@ -295,15 +296,16 @@ void InOrderIssueStage::updateIssuedInst() {
       continue;
     }
 
-    PRF.onInstructionExecuted(&IS);
-    LSU.onInstructionExecuted(IR);
-    notifyInstructionExecuted(IR);
-    ++NumExecuted;
+    // Allow updateCarriedOver to handle the instruction being executed if the
+    // instruction takes multiple cycles to issue.
+    if (!CarriedOver) {
+      PRF.onInstructionExecuted(&IS);
+      LSU.onInstructionExecuted(IR);
+      notifyInstructionExecuted(IR);
+      ++NumExecuted;
 
-    // Allow updateCarriedOver to retire the instruction if the instruction
-    // takes multiple cycles to issue.
-    if (!CarriedOver)
       retireInstruction(*I);
+    }
 
     std::iter_swap(I, E - NumExecuted);
   }
@@ -333,9 +335,16 @@ void InOrderIssueStage::updateCarriedOver() {
   else
     Bandwidth -= CarryOver;
 
-  // updateIssuedInst did not retireInstruction if it was carried over.
-  if (CarriedOver.getInstruction()->isExecuted())
+  // updateIssuedInst did not handle executed if issue had carry over.
+  if (CarriedOver.getInstruction()->isExecuted()) {
+    PRF.onInstructionExecuted(&IS);
+    LSU.onInstructionExecuted(IR);
+    notifyEvent<HWInstructionEvent>(
+        HWInstructionEvent(HWInstructionEvent::Executed, IR));
+    LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR << " is executed\n");
+
     retireInstruction(CarriedOver);
+  }
 
   CarriedOver = InstRef();
   CarryOver = 0;



More information about the llvm-commits mailing list