[llvm] 6784202 - [MCA][ResourceManager] Fix a bug in the instruction issue logic. (#108386)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 16 01:48:45 PDT 2024


Author: Andrea Di Biagio
Date: 2024-09-16T09:48:42+01:00
New Revision: 6784202b6bb0930d2e4bf231e915c61e446b0585

URL: https://github.com/llvm/llvm-project/commit/6784202b6bb0930d2e4bf231e915c61e446b0585
DIFF: https://github.com/llvm/llvm-project/commit/6784202b6bb0930d2e4bf231e915c61e446b0585.diff

LOG: [MCA][ResourceManager] Fix a bug in the instruction issue logic. (#108386)

Before this patch, the pipeline selection logic in
ResourceManager::issueInstruction() didn't know how to correctly handle
instructions which consume multiple partially overlapping resource
groups. In some cases (like the test case from #108157), the inability
to correctly allocate resources on instruction issue was leading to
crashes.

The presence of multiple partially overlapping groups complicates the
selection process by introducing extra constraints. For those cases, the
issue logic now prioritizes groups which are more constrained than
others.

Fixes #108157

Added: 
    llvm/test/tools/llvm-mca/X86/Znver4/partially-overlapping-group-resources.s

Modified: 
    llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
    llvm/lib/MCA/HardwareUnits/ResourceManager.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
index 0e3f16d2a490b5..f71928f835c0e9 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h
@@ -232,6 +232,8 @@ class ResourceState {
   /// `NumUnits` available units.
   bool isReady(unsigned NumUnits = 1) const;
 
+  uint64_t getNumReadyUnits() const { return llvm::popcount(ReadyMask); }
+
   bool isAResourceGroup() const { return IsAGroup; }
 
   bool containsResource(uint64_t ID) const { return ResourceMask & ID; }
@@ -428,9 +430,32 @@ class ResourceManager {
   uint64_t getProcResUnitMask() const { return ProcResUnitMask; }
   uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; }
 
-  void issueInstruction(
-      const InstrDesc &Desc,
-      SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &Pipes);
+  using ResourceWithCycles = std::pair<ResourceRef, ReleaseAtCycles>;
+
+  void issueInstruction(const InstrDesc &Desc,
+                        SmallVectorImpl<ResourceWithCycles> &Pipes) {
+    if (Desc.HasPartiallyOverlappingGroups)
+      return issueInstructionImpl(Desc, Pipes);
+
+    return fastIssueInstruction(Desc, Pipes);
+  }
+
+  // Selects pipeline resources consumed by an instruction.
+  // This method works under the assumption that used group resources don't
+  // partially overlap. The logic is guaranteed to find a valid resource unit
+  // schedule, no matter in which order individual uses are processed. For that
+  // reason, the vector of resource uses is simply (and quickly) processed in
+  // sequence. The resulting schedule is eventually stored into vector `Pipes`.
+  void fastIssueInstruction(const InstrDesc &Desc,
+                            SmallVectorImpl<ResourceWithCycles> &Pipes);
+
+  // Selects pipeline resources consumed by an instruction.
+  // This method works under the assumption that used resource groups may
+  // partially overlap. This complicates the selection process, because the
+  // order in which uses are processed matters. The logic internally prioritizes
+  // groups which are more constrained than others.
+  void issueInstructionImpl(const InstrDesc &Desc,
+                            SmallVectorImpl<ResourceWithCycles> &Pipes);
 
   void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed);
 

diff  --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index 8d99695f4c29e5..82030207eee616 100644
--- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -344,9 +344,90 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
   return BusyResourceMask;
 }
 
-void ResourceManager::issueInstruction(
-    const InstrDesc &Desc,
-    SmallVectorImpl<std::pair<ResourceRef, ReleaseAtCycles>> &Pipes) {
+void ResourceManager::issueInstructionImpl(
+    const InstrDesc &Desc, SmallVectorImpl<ResourceWithCycles> &Pipes) {
+
+  // Step 1.
+  // - Issue writes to non-group resources.
+  // - Issue writes to groups with only a single resource unit available.
+  // - Update reserved groups (if any)
+  // - Add any remaining resource usage requests to a Worklist.
+  SmallVector<std::pair<uint64_t, ResourceUsage>, 4> Worklist;
+
+  using ResourceWithUsage = std::pair<uint64_t, ResourceUsage>;
+
+  for (const ResourceWithUsage &R : Desc.Resources) {
+    const CycleSegment &CS = R.second.CS;
+    if (!CS.size()) {
+      releaseResource(R.first);
+      continue;
+    }
+
+    assert(CS.begin() == 0 && "Invalid {Start, End} cycles!");
+    if (R.second.isReserved()) {
+      assert((llvm::popcount(R.first) > 1) && "Expected a group!");
+      // Mark this group as reserved.
+      assert(R.second.isReserved());
+      reserveResource(R.first);
+      BusyResources[ResourceRef(R.first, R.first)] += CS.size();
+      continue;
+    }
+
+    const ResourceState &RS = *Resources[getResourceStateIndex(R.first)];
+    if (RS.isAResourceGroup() && RS.getNumReadyUnits() > 1) {
+      Worklist.push_back(R);
+      continue;
+    }
+
+    ResourceRef Pipe = selectPipe(R.first);
+    use(Pipe);
+    BusyResources[Pipe] += CS.size();
+    Pipes.emplace_back(std::make_pair(Pipe, ReleaseAtCycles(CS.size())));
+  }
+
+  // Step 2.
+  // Prioritize writes to groups with less available resources.
+  // NOTE: this algorithm has quadratic complexity in the worst case scenario.
+  // On average, this algorithm is expected to perform quite well and always
+  // converge in very few iterations. That is mainly because instructions rarely
+  // consume more than two or three resource groups.
+
+  while (!Worklist.empty()) {
+    sort(Worklist, [&](const ResourceWithUsage &Lhs,
+                       const ResourceWithUsage &Rhs) {
+      const ResourceState &LhsRS = *Resources[getResourceStateIndex(Lhs.first)];
+      const ResourceState &RhsRS = *Resources[getResourceStateIndex(Rhs.first)];
+      uint64_t LhsReadyUnits = LhsRS.getNumReadyUnits();
+      uint64_t RhsReadyUnits = RhsRS.getNumReadyUnits();
+      if (LhsReadyUnits == RhsReadyUnits)
+        return Lhs.first < Rhs.first;
+      return LhsReadyUnits < RhsReadyUnits;
+    });
+
+    SmallVector<ResourceWithUsage, 4> NewWorklist;
+
+    for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
+      const auto &Elt = Worklist[I];
+      const ResourceState &RS = *Resources[getResourceStateIndex(Elt.first)];
+
+      if (I == 0 || RS.getNumReadyUnits() == 1) {
+        ResourceRef Pipe = selectPipe(Elt.first);
+        use(Pipe);
+        const CycleSegment &CS = Elt.second.CS;
+        BusyResources[Pipe] += CS.size();
+        Pipes.emplace_back(std::make_pair(Pipe, ReleaseAtCycles(CS.size())));
+        continue;
+      }
+
+      NewWorklist.push_back(Elt);
+    }
+
+    swap(NewWorklist, Worklist);
+  };
+}
+
+void ResourceManager::fastIssueInstruction(
+    const InstrDesc &Desc, SmallVectorImpl<ResourceWithCycles> &Pipes) {
   for (const std::pair<uint64_t, ResourceUsage> &R : Desc.Resources) {
     const CycleSegment &CS = R.second.CS;
     if (!CS.size()) {

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver4/partially-overlapping-group-resources.s b/llvm/test/tools/llvm-mca/X86/Znver4/partially-overlapping-group-resources.s
new file mode 100644
index 00000000000000..7705c900ddff12
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/partially-overlapping-group-resources.s
@@ -0,0 +1,91 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver4 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
+
+# This is a test for issue #108157.
+#
+# llvm-mca was crashing when analyzing the following code snippet. This was due to a bug in the
+# instruction issue logic (from class ResourceManager) which was affecting instructions declaring
+# consumption of partially overlapping resources.
+# This test makes sure that analysis is successful, and that no crash occurs.
+
+.intel_syntax
+vpconflictd zmm0, zmm3
+kxnorw k1, k1, k1
+vpxord zmm1, zmm1, zmm1
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      300
+# CHECK-NEXT: Total Cycles:      209
+# CHECK-NEXT: Total uOps:        600
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    2.87
+# CHECK-NEXT: IPC:               1.44
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  4      6     1.50                        vpconflictd	zmm0, zmm3
+# CHECK-NEXT:  1      1     0.50                        kxnorw	k1, k1, k1
+# CHECK-NEXT:  1      0     0.17                        vpxord	zmm1, zmm1, zmm1
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - Zn4AGU0
+# CHECK-NEXT: [1]   - Zn4AGU1
+# CHECK-NEXT: [2]   - Zn4AGU2
+# CHECK-NEXT: [3]   - Zn4ALU0
+# CHECK-NEXT: [4]   - Zn4ALU1
+# CHECK-NEXT: [5]   - Zn4ALU2
+# CHECK-NEXT: [6]   - Zn4ALU3
+# CHECK-NEXT: [7]   - Zn4BRU1
+# CHECK-NEXT: [8]   - Zn4FP0
+# CHECK-NEXT: [9]   - Zn4FP1
+# CHECK-NEXT: [10]  - Zn4FP2
+# CHECK-NEXT: [11]  - Zn4FP3
+# CHECK-NEXT: [12.0] - Zn4FP45
+# CHECK-NEXT: [12.1] - Zn4FP45
+# CHECK-NEXT: [13]  - Zn4FPSt
+# CHECK-NEXT: [14.0] - Zn4LSU
+# CHECK-NEXT: [14.1] - Zn4LSU
+# CHECK-NEXT: [14.2] - Zn4LSU
+# CHECK-NEXT: [15.0] - Zn4Load
+# CHECK-NEXT: [15.1] - Zn4Load
+# CHECK-NEXT: [15.2] - Zn4Load
+# CHECK-NEXT: [16.0] - Zn4Store
+# CHECK-NEXT: [16.1] - Zn4Store
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.76   1.76   1.73   1.75    -      -      -      -      -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.76   1.76   0.74   1.74    -      -      -      -      -      -      -      -      -      -      -     vpconflictd	zmm0, zmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.99   0.01    -      -      -      -      -      -      -      -      -      -      -     kxnorw	k1, k1, k1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     vpxord	zmm1, zmm1, zmm1
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345678
+
+# CHECK:      [0,0]     DeeeeeeER   vpconflictd	zmm0, zmm3
+# CHECK-NEXT: [0,1]     D==eE---R   kxnorw	k1, k1, k1
+# CHECK-NEXT: [0,2]     D-------R   vpxord	zmm1, zmm1, zmm1
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       vpconflictd	zmm0, zmm3
+# CHECK-NEXT: 1.     1     3.0    3.0    3.0       kxnorw	k1, k1, k1
+# CHECK-NEXT: 2.     1     0.0    0.0    7.0       vpxord	zmm1, zmm1, zmm1
+# CHECK-NEXT:        1     1.3    1.3    3.3       <total>


        


More information about the llvm-commits mailing list