[llvm] 70b37f4 - [MCA][InstrBuilder] Always check for implicit uses of resource units (PR50725).
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 16 06:54:10 PDT 2021
Author: Andrea Di Biagio
Date: 2021-06-16T14:51:12+01:00
New Revision: 70b37f4c03cd189c94167dc22d9f5303c8773092
URL: https://github.com/llvm/llvm-project/commit/70b37f4c03cd189c94167dc22d9f5303c8773092
DIFF: https://github.com/llvm/llvm-project/commit/70b37f4c03cd189c94167dc22d9f5303c8773092.diff
LOG: [MCA][InstrBuilder] Always check for implicit uses of resource units (PR50725).
When instructions are issued to the underlying pipeline resources, the
mca::ResourceManager should also check for the presence of extra uses induced by
the explicit consumption of multiple partially overlapping group resources.
Fixes PR50725
Added:
llvm/test/tools/llvm-mca/X86/SkylakeClient/PR50725.s
Modified:
llvm/include/llvm/MCA/Instruction.h
llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
llvm/lib/MCA/InstrBuilder.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index f34f31ddba57c..a1a1632e3d514 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -359,6 +359,9 @@ struct InstrDesc {
// A bitmask of used processor resource units.
uint64_t UsedProcResUnits;
+ // A bitmask of implicit uses of processor resource units.
+ uint64_t ImplicitlyUsedProcResUnits;
+
// A bitmask of used processor resource groups.
uint64_t UsedProcResGroups;
diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index 30c4f14d13ae1..3687a24279c2e 100644
--- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -114,8 +114,8 @@ ResourceManager::ResourceManager(const MCSchedModel &SM)
Resource2Groups(SM.getNumProcResourceKinds() - 1, 0),
ProcResID2Mask(SM.getNumProcResourceKinds(), 0),
ResIndex2ProcResID(SM.getNumProcResourceKinds() - 1, 0),
- ProcResUnitMask(0), ReservedResourceGroups(0),
- AvailableBuffers(~0ULL), ReservedBuffers(0) {
+ ProcResUnitMask(0), ReservedResourceGroups(0), AvailableBuffers(~0ULL),
+ ReservedBuffers(0) {
computeProcResourceMasks(SM, ProcResID2Mask);
// initialize vector ResIndex2ProcResID.
@@ -288,6 +288,15 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
BusyResourceMask |= E.first;
}
+ uint64_t ImplicitUses = Desc.ImplicitlyUsedProcResUnits;
+ while (ImplicitUses) {
+ uint64_t Use = ImplicitUses & -ImplicitUses;
+ ImplicitUses ^= Use;
+ unsigned Index = getResourceStateIndex(Use);
+ if (!Resources[Index]->isReady(/* NumUnits */ 1))
+ BusyResourceMask |= Index;
+ }
+
BusyResourceMask &= ProcResUnitMask;
if (BusyResourceMask)
return BusyResourceMask;
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index fa11beb711ef9..1532fd6e6692b 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -43,7 +43,7 @@ static void initializeUsedResources(InstrDesc &ID,
// Populate resources consumed.
using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
- std::vector<ResourcePlusCycles> Worklist;
+ SmallVector<ResourcePlusCycles, 4> Worklist;
// Track cycles contributed by resources that are in a "Super" relationship.
// This is required if we want to correctly match the behavior of method
@@ -109,6 +109,11 @@ static void initializeUsedResources(InstrDesc &ID,
uint64_t UsedResourceUnits = 0;
uint64_t UsedResourceGroups = 0;
+ auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
+ return countPopulation(Elt.first) > 1;
+ });
+ unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
+ uint64_t ImpliedUsesOfResourceUnits = 0;
// Remove cycles contributed by smaller resources.
for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
@@ -127,6 +132,15 @@ static void initializeUsedResources(InstrDesc &ID,
// Remove the leading 1 from the resource group mask.
NormalizedMask ^= PowerOf2Floor(NormalizedMask);
UsedResourceGroups |= (A.first ^ NormalizedMask);
+
+ uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
+ if ((NormalizedMask != AvailableMask) &&
+ countPopulation(AvailableMask) == 1) {
+ // At simulation time, this resource group use will decay into a simple
+ // use of the resource unit identified by `AvailableMask`.
+ ImpliedUsesOfResourceUnits |= AvailableMask;
+ UsedResourceUnits |= AvailableMask;
+ }
}
for (unsigned J = I + 1; J < E; ++J) {
@@ -139,6 +153,31 @@ static void initializeUsedResources(InstrDesc &ID,
}
}
+ // Look for implicit uses of processor resource units. These are resource
+ // units which are indirectly consumed by resource groups, and that must be
+ // always available on instruction issue.
+ while (ImpliedUsesOfResourceUnits) {
+ ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
+ ImpliedUsesOfResourceUnits = 0;
+ for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
+ ResourcePlusCycles &A = Worklist[I];
+ if (!A.second.size())
+ continue;
+
+ uint64_t NormalizedMask = A.first;
+ assert(countPopulation(NormalizedMask) > 1);
+ // Remove the leading 1 from the resource group mask.
+ NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+ uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
+ if ((NormalizedMask != AvailableMask) &&
+ countPopulation(AvailableMask) != 1)
+ continue;
+
+ UsedResourceUnits |= AvailableMask;
+ ImpliedUsesOfResourceUnits |= AvailableMask;
+ }
+ }
+
// A SchedWrite may specify a number of cycles in which a resource group
// is reserved. For example (on target x86; cpu Haswell):
//
@@ -198,6 +237,8 @@ static void initializeUsedResources(InstrDesc &ID,
BufferIDs ^= Current;
}
dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
+ dbgs() << "\t\tImplicitly Used Units="
+ << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
<< '\n';
});
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/PR50725.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/PR50725.s
new file mode 100644
index 0000000000000..c236c629cecf5
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/PR50725.s
@@ -0,0 +1,19 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -all-views=false -summary-view -iterations=1 < %s | FileCheck %s
+
+# Do not crash when running this simulation.
+# It is not safe to issue FXRSTOR if SKLPort1 is not available.
+
+bswap %eax
+bswap %eax
+fxrstor 64(%rsp)
+
+# CHECK: Iterations: 1
+# CHECK-NEXT: Instructions: 3
+# CHECK-NEXT: Total Cycles: 68
+# CHECK-NEXT: Total uOps: 92
+
+# CHECK: Dispatch Width: 6
+# CHECK-NEXT: uOps Per Cycle: 1.35
+# CHECK-NEXT: IPC: 0.04
+# CHECK-NEXT: Block RThroughput: 16.5
More information about the llvm-commits
mailing list