[llvm] 3262794 - [MCA] Correctly check pipeline availability for partially overlapping resource groups.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 7 04:19:58 PDT 2022


Author: Andrea Di Biagio
Date: 2022-09-07T12:17:59+01:00
New Revision: 3262794804ad23ed4a511669ffc97d128512bc37

URL: https://github.com/llvm/llvm-project/commit/3262794804ad23ed4a511669ffc97d128512bc37
DIFF: https://github.com/llvm/llvm-project/commit/3262794804ad23ed4a511669ffc97d128512bc37.diff

LOG: [MCA] Correctly check pipeline availability for partially overlapping resource groups.

This patch mostly reverts commit 70b37f4c03c which fixed PR50725.

In case of explicit consumption of multiple partially overlapping group
resources, the ResourceManager was not correctly checking pipeline
esources availability.

The fix for PR50725 only partially addressed a few instances of that issue.
This is a more general (although, technically slower) fix for that same issue.

It also fixes Issue #57548

Thanks to Haohai Wen for the small reproducible.

Added: 
    llvm/test/tools/llvm-mca/X86/AlderlakeP/partially-overlapping-groups.s

Modified: 
    llvm/include/llvm/MCA/Instruction.h
    llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
    llvm/lib/MCA/InstrBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 29b797cee260..e48a70164bec 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -458,9 +458,6 @@ struct InstrDesc {
   // A bitmask of used processor resource units.
   uint64_t UsedProcResUnits;
 
-  // A bitmask of implicit uses of processor resource units.
-  uint64_t ImplicitlyUsedProcResUnits;
-
   // A bitmask of used processor resource groups.
   uint64_t UsedProcResGroups;
 
@@ -481,6 +478,9 @@ struct InstrDesc {
   // recycled.
   unsigned IsRecyclable : 1;
 
+  // True if some of the consumed group resources are partially overlapping.
+  unsigned HasPartiallyOverlappingGroups : 1;
+
   // A zero latency instruction doesn't consume any scheduler resources.
   bool isZeroLatency() const { return !MaxLatency && Resources.empty(); }
 

diff  --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index 3687a24279c2..600fe5b7a187 100644
--- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -281,26 +281,67 @@ void ResourceManager::releaseBuffers(uint64_t ConsumedBuffers) {
 
 uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
   uint64_t BusyResourceMask = 0;
+  uint64_t ConsumedResourceMask = 0;
+  DenseMap<uint64_t, unsigned> AvailableUnits;
+
   for (const std::pair<uint64_t, ResourceUsage> &E : Desc.Resources) {
     unsigned NumUnits = E.second.isReserved() ? 0U : E.second.NumUnits;
-    unsigned Index = getResourceStateIndex(E.first);
-    if (!Resources[Index]->isReady(NumUnits))
+    const ResourceState &RS = *Resources[getResourceStateIndex(E.first)];
+    if (!RS.isReady(NumUnits)) {
       BusyResourceMask |= E.first;
-  }
+      continue;
+    }
 
-  uint64_t ImplicitUses = Desc.ImplicitlyUsedProcResUnits;
-  while (ImplicitUses) {
-    uint64_t Use = ImplicitUses & -ImplicitUses;
-    ImplicitUses ^= Use;
-    unsigned Index = getResourceStateIndex(Use);
-    if (!Resources[Index]->isReady(/* NumUnits */ 1))
-      BusyResourceMask |= Index;
+    if (Desc.HasPartiallyOverlappingGroups && !RS.isAResourceGroup()) {
+      unsigned NumAvailableUnits = countPopulation(RS.getReadyMask());
+      NumAvailableUnits -= NumUnits;
+      AvailableUnits[E.first] = NumAvailableUnits;
+      if (!NumAvailableUnits)
+        ConsumedResourceMask |= E.first;
+    }
   }
 
   BusyResourceMask &= ProcResUnitMask;
   if (BusyResourceMask)
     return BusyResourceMask;
-  return Desc.UsedProcResGroups & ReservedResourceGroups;
+
+  BusyResourceMask = Desc.UsedProcResGroups & ReservedResourceGroups;
+  if (!Desc.HasPartiallyOverlappingGroups || BusyResourceMask)
+    return BusyResourceMask;
+
+  // If this instruction has overlapping groups, make sure that we can
+  // select at least one unit per group.
+  for (const std::pair<uint64_t, ResourceUsage> &E : Desc.Resources) {
+    const ResourceState &RS = *Resources[getResourceStateIndex(E.first)];
+    if (!E.second.isReserved() && RS.isAResourceGroup()) {
+      uint64_t ReadyMask = RS.getReadyMask() & ~ConsumedResourceMask;
+      if (!ReadyMask) {
+        BusyResourceMask |= RS.getReadyMask();
+        continue;
+      }
+
+      uint64_t ResourceMask = PowerOf2Floor(ReadyMask);
+
+      auto it = AvailableUnits.find(ResourceMask);
+      if (it == AvailableUnits.end()) {
+        unsigned Index = getResourceStateIndex(ResourceMask);
+        unsigned NumUnits = countPopulation(Resources[Index]->getReadyMask());
+        it =
+            AvailableUnits.insert(std::make_pair(ResourceMask, NumUnits)).first;
+      }
+
+      if (!it->second) {
+        BusyResourceMask |= it->first;
+        continue;
+      }
+
+      it->second--;
+      if (!it->second)
+        ConsumedResourceMask |= it->first;
+    }
+  }
+
+  return BusyResourceMask;
 }
 
 void ResourceManager::issueInstruction(

diff  --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 45acea253587..71c565236e88 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -112,13 +112,12 @@ static void initializeUsedResources(InstrDesc &ID,
 
   uint64_t UsedResourceUnits = 0;
   uint64_t UsedResourceGroups = 0;
-  auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
-    return countPopulation(Elt.first) > 1;
-  });
-  unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
-  uint64_t ImpliedUsesOfResourceUnits = 0;
+  uint64_t UnitsFromResourceGroups = 0;
+
+  // Remove cycles contributed by smaller resources, and check if there
+  // are partially overlapping resource groups.
+  ID.HasPartiallyOverlappingGroups = false;
 
-  // Remove cycles contributed by smaller resources.
   for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
     ResourcePlusCycles &A = Worklist[I];
     if (!A.second.size()) {
@@ -129,21 +128,17 @@ static void initializeUsedResources(InstrDesc &ID,
 
     ID.Resources.emplace_back(A);
     uint64_t NormalizedMask = A.first;
+
     if (countPopulation(A.first) == 1) {
       UsedResourceUnits |= A.first;
     } else {
       // Remove the leading 1 from the resource group mask.
       NormalizedMask ^= PowerOf2Floor(NormalizedMask);
-      UsedResourceGroups |= (A.first ^ NormalizedMask);
+      if (UnitsFromResourceGroups & NormalizedMask)
+        ID.HasPartiallyOverlappingGroups = true;
 
-      uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
-      if ((NormalizedMask != AvailableMask) &&
-          countPopulation(AvailableMask) == 1) {
-        // At simulation time, this resource group use will decay into a simple
-        // use of the resource unit identified by `AvailableMask`.
-        ImpliedUsesOfResourceUnits |= AvailableMask;
-        UsedResourceUnits |= AvailableMask;
-      }
+      UnitsFromResourceGroups |= NormalizedMask;
+      UsedResourceGroups |= (A.first ^ NormalizedMask);
     }
 
     for (unsigned J = I + 1; J < E; ++J) {
@@ -156,31 +151,6 @@ static void initializeUsedResources(InstrDesc &ID,
     }
   }
 
-  // Look for implicit uses of processor resource units. These are resource
-  // units which are indirectly consumed by resource groups, and that must be
-  // always available on instruction issue.
-  while (ImpliedUsesOfResourceUnits) {
-    ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
-    ImpliedUsesOfResourceUnits = 0;
-    for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
-      ResourcePlusCycles &A = Worklist[I];
-      if (!A.second.size())
-        continue;
-
-      uint64_t NormalizedMask = A.first;
-      assert(countPopulation(NormalizedMask) > 1);
-      // Remove the leading 1 from the resource group mask.
-      NormalizedMask ^= PowerOf2Floor(NormalizedMask);
-      uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
-      if ((NormalizedMask != AvailableMask) &&
-          countPopulation(AvailableMask) != 1)
-        continue;
-
-      UsedResourceUnits |= AvailableMask;
-      ImpliedUsesOfResourceUnits |= AvailableMask;
-    }
-  }
-
   // A SchedWrite may specify a number of cycles in which a resource group
   // is reserved. For example (on target x86; cpu Haswell):
   //
@@ -240,10 +210,10 @@ static void initializeUsedResources(InstrDesc &ID,
       BufferIDs ^= Current;
     }
     dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
-    dbgs() << "\t\tImplicitly Used Units="
-           << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
     dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
            << '\n';
+    dbgs() << "\t\tHasPartiallyOverlappingGroups="
+           << ID.HasPartiallyOverlappingGroups << '\n';
   });
 }
 

diff  --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/partially-overlapping-groups.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/partially-overlapping-groups.s
new file mode 100644
index 000000000000..6229ddb5909e
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/partially-overlapping-groups.s
@@ -0,0 +1,21 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=alderlake -all-views=false -summary-view < %s | FileCheck %s
+
+# Issue #57548
+
+# Do not crash when simulating instructions that consume partially overlapping
+# resource groups.
+
+vpsllw %xmm1, %ymm0, %ymm0
+vpsllw %xmm1, %xmm2, %xmm1
+vpand %ymm1, %ymm0, %ymm0
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      300
+# CHECK-NEXT: Total Cycles:      503
+# CHECK-NEXT: Total uOps:        500
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.99
+# CHECK-NEXT: IPC:               0.60
+# CHECK-NEXT: Block RThroughput: 1.0


        


More information about the llvm-commits mailing list