[llvm] 70b37f4 - [MCA][InstrBuilder] Always check for implicit uses of resource units (PR50725).

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 16 06:54:10 PDT 2021


Author: Andrea Di Biagio
Date: 2021-06-16T14:51:12+01:00
New Revision: 70b37f4c03cd189c94167dc22d9f5303c8773092

URL: https://github.com/llvm/llvm-project/commit/70b37f4c03cd189c94167dc22d9f5303c8773092
DIFF: https://github.com/llvm/llvm-project/commit/70b37f4c03cd189c94167dc22d9f5303c8773092.diff

LOG: [MCA][InstrBuilder] Always check for implicit uses of resource units (PR50725).

When instructions are issued to the underlying pipeline resources, the
mca::ResourceManager should also check for the presence of extra uses induced by
the explicit consumption of multiple partially overlapping group resources.

Fixes PR50725

Added: 
    llvm/test/tools/llvm-mca/X86/SkylakeClient/PR50725.s

Modified: 
    llvm/include/llvm/MCA/Instruction.h
    llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
    llvm/lib/MCA/InstrBuilder.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index f34f31ddba57c..a1a1632e3d514 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -359,6 +359,9 @@ struct InstrDesc {
   // A bitmask of used processor resource units.
   uint64_t UsedProcResUnits;
 
+  // A bitmask of implicit uses of processor resource units.
+  uint64_t ImplicitlyUsedProcResUnits;
+
   // A bitmask of used processor resource groups.
   uint64_t UsedProcResGroups;
 

diff  --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index 30c4f14d13ae1..3687a24279c2e 100644
--- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -114,8 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
       Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
       ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
       ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
-      ProcResUnitMask(0), ReservedResourceGroups(0),
-      AvailableBuffers(~0ULL), ReservedBuffers(0) {
+      ProcResUnitMask(0), ReservedResourceGroups(0), AvailableBuffers(~0ULL),
+      ReservedBuffers(0) {
   computeProcResourceMasks(SM, ProcResID2Mask);
 
   // initialize vector ResIndex2ProcResID.
@@ -288,6 +288,15 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
       BusyResourceMask |= E.first;
   }
 
+  uint64_t ImplicitUses = Desc.ImplicitlyUsedProcResUnits;
+  while (ImplicitUses) {
+    uint64_t Use = ImplicitUses & -ImplicitUses;
+    ImplicitUses ^= Use;
+    unsigned Index = getResourceStateIndex(Use);
+    if (!Resources[Index]->isReady(/* NumUnits */ 1))
+      BusyResourceMask |= Index;
+  }
+
   BusyResourceMask &= ProcResUnitMask;
   if (BusyResourceMask)
     return BusyResourceMask;

diff  --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index fa11beb711ef9..1532fd6e6692b 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -43,7 +43,7 @@ static void initializeUsedResources(InstrDesc &ID,
 
   // Populate resources consumed.
   using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
-  std::vector<ResourcePlusCycles> Worklist;
+  SmallVector<ResourcePlusCycles, 4> Worklist;
 
   // Track cycles contributed by resources that are in a "Super" relationship.
   // This is required if we want to correctly match the behavior of method
@@ -109,6 +109,11 @@ static void initializeUsedResources(InstrDesc &ID,
 
   uint64_t UsedResourceUnits = 0;
   uint64_t UsedResourceGroups = 0;
+  auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
+    return countPopulation(Elt.first) > 1;
+  });
+  unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
+  uint64_t ImpliedUsesOfResourceUnits = 0;
 
   // Remove cycles contributed by smaller resources.
   for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
@@ -127,6 +132,15 @@ static void initializeUsedResources(InstrDesc &ID,
       // Remove the leading 1 from the resource group mask.
       NormalizedMask ^= PowerOf2Floor(NormalizedMask);
       UsedResourceGroups |= (A.first ^ NormalizedMask);
+
+      uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
+      if ((NormalizedMask != AvailableMask) &&
+          countPopulation(AvailableMask) == 1) {
+        // At simulation time, this resource group use will decay into a simple
+        // use of the resource unit identified by `AvailableMask`.
+        ImpliedUsesOfResourceUnits |= AvailableMask;
+        UsedResourceUnits |= AvailableMask;
+      }
     }
 
     for (unsigned J = I + 1; J < E; ++J) {
@@ -139,6 +153,31 @@ static void initializeUsedResources(InstrDesc &ID,
     }
   }
 
+  // Look for implicit uses of processor resource units. These are resource
+  // units which are indirectly consumed by resource groups, and that must be
+  // always available on instruction issue.
+  while (ImpliedUsesOfResourceUnits) {
+    ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
+    ImpliedUsesOfResourceUnits = 0;
+    for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
+      ResourcePlusCycles &A = Worklist[I];
+      if (!A.second.size())
+        continue;
+
+      uint64_t NormalizedMask = A.first;
+      assert(countPopulation(NormalizedMask) > 1);
+      // Remove the leading 1 from the resource group mask.
+      NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+      uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
+      if ((NormalizedMask != AvailableMask) &&
+          countPopulation(AvailableMask) != 1)
+        continue;
+
+      UsedResourceUnits |= AvailableMask;
+      ImpliedUsesOfResourceUnits |= AvailableMask;
+    }
+  }
+
   // A SchedWrite may specify a number of cycles in which a resource group
   // is reserved. For example (on target x86; cpu Haswell):
   //
@@ -198,6 +237,8 @@ static void initializeUsedResources(InstrDesc &ID,
       BufferIDs ^= Current;
     }
     dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
+    dbgs() << "\t\tImplicitly Used Units="
+           << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
     dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
            << '\n';
   });

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/PR50725.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/PR50725.s
new file mode 100644
index 0000000000000..c236c629cecf5
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/PR50725.s
@@ -0,0 +1,19 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -all-views=false -summary-view -iterations=1 < %s | FileCheck %s
+
+# Do not crash when running this simulation.
+# It is not safe to issue FXRSTOR if SKLPort1 is not available.
+
+bswap %eax
+bswap %eax
+fxrstor 64(%rsp)
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      3
+# CHECK-NEXT: Total Cycles:      68
+# CHECK-NEXT: Total uOps:        92
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    1.35
+# CHECK-NEXT: IPC:               0.04
+# CHECK-NEXT: Block RThroughput: 16.5


        


More information about the llvm-commits mailing list