[llvm] 292da93 - [MCA] Disable RCU for InOrderIssueStage
Andrew Savonichev via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 24 03:55:52 PDT 2021
Author: Andrew Savonichev
Date: 2021-03-24T13:54:04+03:00
New Revision: 292da93d59a3688ffc95c10de7986472242e8f1d
URL: https://github.com/llvm/llvm-project/commit/292da93d59a3688ffc95c10de7986472242e8f1d
DIFF: https://github.com/llvm/llvm-project/commit/292da93d59a3688ffc95c10de7986472242e8f1d.diff
LOG: [MCA] Disable RCU for InOrderIssueStage
This is a follow-up for:
D98604 [MCA] Ensure that writes occur in-order
When instructions are aligned by the order of writes, they retire
in-order naturally. There is no need for an RCU, so it is disabled.
Differential Revision: https://reviews.llvm.org/D98628
Added:
Modified:
llvm/docs/CommandGuide/llvm-mca.rst
llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
llvm/include/llvm/MCA/Stages/RetireStage.h
llvm/lib/MCA/Context.cpp
llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
llvm/lib/MCA/Stages/InOrderIssueStage.cpp
llvm/lib/MCA/Stages/RetireStage.cpp
llvm/lib/Target/AArch64/AArch64SchedA55.td
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s
llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s
llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s
llvm/tools/llvm-mca/Views/TimelineView.cpp
llvm/tools/llvm-mca/llvm-mca.cpp
Removed:
################################################################################
diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst
index 1229fd934c80..9e40e5d9e4f5 100644
--- a/llvm/docs/CommandGuide/llvm-mca.rst
+++ b/llvm/docs/CommandGuide/llvm-mca.rst
@@ -975,7 +975,6 @@ met. Multiple instructions can be issued in one cycle according to the value of
the ``IssueWidth`` parameter in LLVM's scheduling model.
Once issued, an instruction is moved to ``IssuedInst`` set until it is ready to
-retire. If ``RetireControlUnit`` is defined in the LLVM's scheduling model,
-:program:`llvm-mca` ensures that instructions are retired in-order. However, an
-instruction is allowed to retire out-of-order if ``RetireOOO`` property is true
-for at least one of its writes.
+retire. :program:`llvm-mca` ensures that writes are committed in-order. However,
+an instruction is allowed to commit writes and retire out-of-order if
+``RetireOOO`` property is true for at least one of its writes.
diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
index 867a6c1df3c5..e3aec7fb78ca 100644
--- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -27,12 +27,10 @@ class MCSubtargetInfo;
namespace mca {
class RegisterFile;
class ResourceManager;
-struct RetireControlUnit;
class InOrderIssueStage final : public Stage {
const MCSchedModel &SM;
const MCSubtargetInfo &STI;
- RetireControlUnit &RCU;
RegisterFile &PRF;
std::unique_ptr<ResourceManager> RM;
@@ -67,14 +65,16 @@ class InOrderIssueStage final : public Stage {
Error tryIssue(InstRef &IR, unsigned *StallCycles);
/// Update status of instructions from IssuedInst.
- Error updateIssuedInst();
+ void updateIssuedInst();
+
+ /// Retire instruction once it is executed.
+ void retireInstruction(InstRef &IR);
public:
- InOrderIssueStage(RetireControlUnit &RCU, RegisterFile &PRF,
- const MCSchedModel &SM, const MCSubtargetInfo &STI)
- : SM(SM), STI(STI), RCU(RCU), PRF(PRF),
- RM(std::make_unique<ResourceManager>(SM)), NumIssued(0),
- StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {}
+ InOrderIssueStage(RegisterFile &PRF, const MCSchedModel &SM,
+ const MCSubtargetInfo &STI)
+ : SM(SM), STI(STI), PRF(PRF), RM(std::make_unique<ResourceManager>(SM)),
+ NumIssued(0), StallCyclesLeft(0), Bandwidth(0), LastWriteBackCycle(0) {}
bool isAvailable(const InstRef &) const override;
bool hasWorkToComplete() const override;
diff --git a/llvm/include/llvm/MCA/Stages/RetireStage.h b/llvm/include/llvm/MCA/Stages/RetireStage.h
index 27fb9c31d7cd..b635a01db85e 100644
--- a/llvm/include/llvm/MCA/Stages/RetireStage.h
+++ b/llvm/include/llvm/MCA/Stages/RetireStage.h
@@ -30,7 +30,6 @@ class RetireStage final : public Stage {
RetireControlUnit &RCU;
RegisterFile &PRF;
LSUnitBase &LSU;
- SmallVector<InstRef, 4> RetireInst;
RetireStage(const RetireStage &Other) = delete;
RetireStage &operator=(const RetireStage &Other) = delete;
@@ -39,9 +38,7 @@ class RetireStage final : public Stage {
RetireStage(RetireControlUnit &R, RegisterFile &F, LSUnitBase &LS)
: Stage(), RCU(R), PRF(F), LSU(LS) {}
- bool hasWorkToComplete() const override {
- return !RCU.isEmpty() || !RetireInst.empty();
- }
+ bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
Error cycleStart() override;
Error cycleEnd() override;
Error execute(InstRef &IR) override;
diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp
index 250ebebefe7a..8f5addbe6715 100644
--- a/llvm/lib/MCA/Context.cpp
+++ b/llvm/lib/MCA/Context.cpp
@@ -71,23 +71,16 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
std::unique_ptr<Pipeline>
Context::createInOrderPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr) {
const MCSchedModel &SM = STI.getSchedModel();
- auto RCU = std::make_unique<RetireControlUnit>(SM);
auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
- auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
- Opts.StoreQueueSize, Opts.AssumeNoAlias);
auto Entry = std::make_unique<EntryStage>(SrcMgr);
- auto InOrderIssue = std::make_unique<InOrderIssueStage>(*RCU, *PRF, SM, STI);
- auto Retire = std::make_unique<RetireStage>(*RCU, *PRF, *LSU);
+ auto InOrderIssue = std::make_unique<InOrderIssueStage>(*PRF, SM, STI);
auto StagePipeline = std::make_unique<Pipeline>();
StagePipeline->appendStage(std::move(Entry));
StagePipeline->appendStage(std::move(InOrderIssue));
- StagePipeline->appendStage(std::move(Retire));
- addHardwareUnit(std::move(RCU));
addHardwareUnit(std::move(PRF));
- addHardwareUnit(std::move(LSU));
return StagePipeline;
}
diff --git a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
index 812109f26684..9297f0c4fd7b 100644
--- a/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
+++ b/llvm/lib/MCA/HardwareUnits/RetireControlUnit.cpp
@@ -23,6 +23,8 @@ RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
: NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
AvailableEntries(SM.isOutOfOrder() ? SM.MicroOpBufferSize : 0),
MaxRetirePerCycle(0) {
+ assert(SM.isOutOfOrder() &&
+ "RetireControlUnit is not available for in-order processors");
// Check if the scheduling model provides extra information about the machine
// processor. If so, then use that information to set the reorder buffer size
// and the maximum number of instructions retired per cycle.
@@ -33,17 +35,12 @@ RetireControlUnit::RetireControlUnit(const MCSchedModel &SM)
MaxRetirePerCycle = EPI.MaxRetirePerCycle;
}
NumROBEntries = AvailableEntries;
- if (!SM.isOutOfOrder() && !NumROBEntries)
- return;
assert(NumROBEntries && "Invalid reorder buffer size!");
Queue.resize(2 * NumROBEntries);
}
// Reserves a number of slots, and returns a new token.
unsigned RetireControlUnit::dispatch(const InstRef &IR) {
- if (!NumROBEntries)
- return UnhandledTokenID;
-
const Instruction &Inst = *IR.getInstruction();
unsigned Entries = normalizeQuantity(Inst.getNumMicroOps());
assert((AvailableEntries >= Entries) && "Reorder Buffer unavailable!");
diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
index cf536979578b..2d2a75cc99a7 100644
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -182,7 +182,7 @@ static void addRegisterReadWrite(RegisterFile &PRF, Instruction &IS,
PRF.addRegisterWrite(WriteRef(SourceIndex, &WS), UsedRegs);
}
-static void notifyInstructionExecute(
+static void notifyInstructionIssue(
const InstRef &IR,
const SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &UsedRes,
const Stage &S) {
@@ -205,28 +205,11 @@ static void notifyInstructionDispatch(const InstRef &IR, unsigned Ops,
}
llvm::Error InOrderIssueStage::execute(InstRef &IR) {
- Instruction &IS = *IR.getInstruction();
- const InstrDesc &Desc = IS.getDesc();
-
- unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID;
- if (!Desc.RetireOOO)
- RCUTokenID = RCU.dispatch(IR);
- IS.dispatch(RCUTokenID);
-
- if (Desc.EndGroup) {
- Bandwidth = 0;
- } else {
- unsigned NumMicroOps = IR.getInstruction()->getNumMicroOps();
- assert(Bandwidth >= NumMicroOps);
- Bandwidth -= NumMicroOps;
- }
-
if (llvm::Error E = tryIssue(IR, &StallCyclesLeft))
return E;
if (StallCyclesLeft) {
StalledInst = IR;
- Bandwidth = 0;
}
return llvm::ErrorSuccess();
@@ -235,20 +218,26 @@ llvm::Error InOrderIssueStage::execute(InstRef &IR) {
llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
Instruction &IS = *IR.getInstruction();
unsigned SourceIndex = IR.getSourceIndex();
+ const InstrDesc &Desc = IS.getDesc();
if (!canExecute(IR, StallCycles)) {
LLVM_DEBUG(dbgs() << "[E] Stalled #" << IR << " for " << *StallCycles
<< " cycles\n");
+ Bandwidth = 0;
return llvm::ErrorSuccess();
}
+ unsigned RCUTokenID = RetireControlUnit::UnhandledTokenID;
+ IS.dispatch(RCUTokenID);
+
SmallVector<unsigned, 4> UsedRegs(PRF.getNumRegisterFiles());
addRegisterReadWrite(PRF, IS, SourceIndex, STI, UsedRegs);
- notifyInstructionDispatch(IR, IS.getDesc().NumMicroOps, UsedRegs, *this);
+ unsigned NumMicroOps = IS.getNumMicroOps();
+ notifyInstructionDispatch(IR, NumMicroOps, UsedRegs, *this);
SmallVector<std::pair<ResourceRef, ResourceCycles>, 4> UsedResources;
- RM->issueInstruction(IS.getDesc(), UsedResources);
+ RM->issueInstruction(Desc, UsedResources);
IS.execute(SourceIndex);
// Replace resource masks with valid resource processor IDs.
@@ -256,10 +245,17 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
uint64_t Mask = Use.first.first;
Use.first.first = RM->resolveResourceMask(Mask);
}
- notifyInstructionExecute(IR, UsedResources, *this);
+ notifyInstructionIssue(IR, UsedResources, *this);
+
+ if (Desc.EndGroup) {
+ Bandwidth = 0;
+ } else {
+ assert(Bandwidth >= NumMicroOps);
+ Bandwidth -= NumMicroOps;
+ }
IssuedInst.push_back(IR);
- ++NumIssued;
+ NumIssued += NumMicroOps;
if (!IR.getInstruction()->getDesc().RetireOOO)
LastWriteBackCycle = findLastWriteBackCycle(IR);
@@ -267,7 +263,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR, unsigned *StallCycles) {
return llvm::ErrorSuccess();
}
-llvm::Error InOrderIssueStage::updateIssuedInst() {
+void InOrderIssueStage::updateIssuedInst() {
// Update other instructions. Executed instructions will be retired during the
// next cycle.
unsigned NumExecuted = 0;
@@ -283,29 +279,37 @@ llvm::Error InOrderIssueStage::updateIssuedInst() {
++I;
continue;
}
+
+ PRF.onInstructionExecuted(&IS);
notifyEvent<HWInstructionEvent>(
HWInstructionEvent(HWInstructionEvent::Executed, IR));
-
LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR << " is executed\n");
++NumExecuted;
+
+ retireInstruction(*I);
+
std::iter_swap(I, E - NumExecuted);
}
- // Retire instructions in the next cycle
- if (NumExecuted) {
- for (auto I = IssuedInst.end() - NumExecuted, E = IssuedInst.end(); I != E;
- ++I) {
- if (llvm::Error E = moveToTheNextStage(*I))
- return E;
- }
+ if (NumExecuted)
IssuedInst.resize(IssuedInst.size() - NumExecuted);
- }
+}
- return llvm::ErrorSuccess();
+void InOrderIssueStage::retireInstruction(InstRef &IR) {
+ Instruction &IS = *IR.getInstruction();
+ IS.retire();
+
+ llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
+ for (const WriteState &WS : IS.getDefs())
+ PRF.removeRegisterWrite(WS, FreedRegs);
+
+ notifyEvent<HWInstructionEvent>(HWInstructionRetiredEvent(IR, FreedRegs));
+ LLVM_DEBUG(dbgs() << "[E] Retired #" << IR << " \n");
}
llvm::Error InOrderIssueStage::cycleStart() {
NumIssued = 0;
+ Bandwidth = SM.IssueWidth;
PRF.cycleStart();
@@ -313,8 +317,7 @@ llvm::Error InOrderIssueStage::cycleStart() {
SmallVector<ResourceRef, 4> Freed;
RM->cycleEvent(Freed);
- if (llvm::Error E = updateIssuedInst())
- return E;
+ updateIssuedInst();
// Issue instructions scheduled for this cycle
if (!StallCyclesLeft && StalledInst) {
@@ -325,7 +328,6 @@ llvm::Error InOrderIssueStage::cycleStart() {
if (!StallCyclesLeft) {
StalledInst.invalidate();
assert(NumIssued <= SM.IssueWidth && "Overflow.");
- Bandwidth = SM.IssueWidth - NumIssued;
} else {
// The instruction is still stalled, cannot issue any new instructions in
// this cycle.
diff --git a/llvm/lib/MCA/Stages/RetireStage.cpp b/llvm/lib/MCA/Stages/RetireStage.cpp
index 43f71c2e3642..00dbb4b0347a 100644
--- a/llvm/lib/MCA/Stages/RetireStage.cpp
+++ b/llvm/lib/MCA/Stages/RetireStage.cpp
@@ -38,13 +38,6 @@ llvm::Error RetireStage::cycleStart() {
NumRetired++;
}
- // Retire instructions that are not controlled by the RCU
- for (InstRef &IR : RetireInst) {
- IR.getInstruction()->retire();
- notifyInstructionRetired(IR);
- }
- RetireInst.resize(0);
-
return llvm::ErrorSuccess();
}
@@ -58,12 +51,9 @@ llvm::Error RetireStage::execute(InstRef &IR) {
PRF.onInstructionExecuted(&IS);
unsigned TokenID = IS.getRCUTokenID();
- if (TokenID != RetireControlUnit::UnhandledTokenID) {
- RCU.onInstructionExecuted(TokenID);
- return llvm::ErrorSuccess();
- }
+ assert(TokenID != RetireControlUnit::UnhandledTokenID);
+ RCU.onInstructionExecuted(TokenID);
- RetireInst.push_back(IR);
return llvm::ErrorSuccess();
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index ff7766f2caec..0015c27228f6 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -339,5 +339,4 @@ def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
-def A55RCU : RetireControlUnit<64, 0>;
}
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s
index be817b755f77..dbcb3c53b22f 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-add-sequence.s
@@ -8,12 +8,12 @@ add w1, w0, #4
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 8
-# CHECK-NEXT: Total Cycles: 10
+# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Total uOps: 8
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.80
-# CHECK-NEXT: IPC: 0.80
+# CHECK-NEXT: uOps Per Cycle: 0.89
+# CHECK-NEXT: IPC: 0.89
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
@@ -56,16 +56,16 @@ add w1, w0, #4
# CHECK-NEXT: 1.00 - - - - - - - - - - - add w1, w0, #4
# CHECK: Timeline view:
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 012345678
-# CHECK: [0,0] DeeER. . add w2, w3, #1
-# CHECK-NEXT: [0,1] DeeER. . add w4, w3, #2, lsl #12
-# CHECK-NEXT: [0,2] .DeeER . add w0, w4, #3
-# CHECK-NEXT: [0,3] . DeeER . add w1, w0, #4
-# CHECK-NEXT: [1,0] . DeeER . add w2, w3, #1
-# CHECK-NEXT: [1,1] . DeeER . add w4, w3, #2, lsl #12
-# CHECK-NEXT: [1,2] . DeeER. add w0, w4, #3
-# CHECK-NEXT: [1,3] . DeeER add w1, w0, #4
+# CHECK: [0,0] DeeE . . add w2, w3, #1
+# CHECK-NEXT: [0,1] DeeE . . add w4, w3, #2, lsl #12
+# CHECK-NEXT: [0,2] .DeeE. . add w0, w4, #3
+# CHECK-NEXT: [0,3] . DeeE . add w1, w0, #4
+# CHECK-NEXT: [1,0] . DeeE . add w2, w3, #1
+# CHECK-NEXT: [1,1] . DeeE . add w4, w3, #2, lsl #12
+# CHECK-NEXT: [1,2] . DeeE. add w0, w4, #3
+# CHECK-NEXT: [1,3] . DeeE add w1, w0, #4
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
index a672c8c879ae..9081fb525ee2 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-stats.s
@@ -10,12 +10,12 @@ str w0, [x21, x18, lsl #2]
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
-# CHECK-NEXT: Total Cycles: 21
+# CHECK-NEXT: Total Cycles: 20
# CHECK-NEXT: Total uOps: 14
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.67
-# CHECK-NEXT: IPC: 0.57
+# CHECK-NEXT: uOps Per Cycle: 0.70
+# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Instruction Info:
@@ -35,7 +35,7 @@ str w0, [x21, x18, lsl #2]
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
# CHECK: Dynamic Dispatch Stall Cycles:
-# CHECK-NEXT: RAT - Register unavailable: 8 (38.1%)
+# CHECK-NEXT: RAT - Register unavailable: 8 (40.0%)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
@@ -44,33 +44,22 @@ str w0, [x21, x18, lsl #2]
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 11 (52.4%)
-# CHECK-NEXT: 1, 6 (28.6%)
-# CHECK-NEXT: 2, 4 (19.0%)
+# CHECK-NEXT: 0, 10 (50.0%)
+# CHECK-NEXT: 1, 6 (30.0%)
+# CHECK-NEXT: 2, 4 (20.0%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT: 0, 11 (52.4%)
-# CHECK-NEXT: 1, 6 (28.6%)
-# CHECK-NEXT: 2, 4 (19.0%)
+# CHECK-NEXT: 0, 10 (50.0%)
+# CHECK-NEXT: 1, 6 (30.0%)
+# CHECK-NEXT: 2, 4 (20.0%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
-# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
-# CHECK-NEXT: [# retired], [# cycles]
-# CHECK-NEXT: 0, 14 (66.7%)
-# CHECK-NEXT: 1, 4 (19.0%)
-# CHECK-NEXT: 2, 1 (4.8%)
-# CHECK-NEXT: 3, 2 (9.5%)
-
-# CHECK: Total ROB Entries: 64
-# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% )
-# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
-
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 14
-# CHECK-NEXT: Max number of mappings used: 6
+# CHECK-NEXT: Max number of mappings used: 4
# CHECK: Resources:
# CHECK-NEXT: [0.0] - CortexA55UnitALU
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
index 1d4e41a63c63..d49e68adc1c3 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-all-views.s
@@ -10,12 +10,12 @@ str w0, [x21, x18, lsl #2]
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
-# CHECK-NEXT: Total Cycles: 21
+# CHECK-NEXT: Total Cycles: 20
# CHECK-NEXT: Total uOps: 14
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.67
-# CHECK-NEXT: IPC: 0.57
+# CHECK-NEXT: uOps Per Cycle: 0.70
+# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 3.5
# CHECK: Instruction Info:
@@ -35,7 +35,7 @@ str w0, [x21, x18, lsl #2]
# CHECK-NEXT: 1 4 1.00 * str w0, [x21, x18, lsl #2]
# CHECK: Dynamic Dispatch Stall Cycles:
-# CHECK-NEXT: RAT - Register unavailable: 8 (38.1%)
+# CHECK-NEXT: RAT - Register unavailable: 8 (40.0%)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
@@ -44,33 +44,22 @@ str w0, [x21, x18, lsl #2]
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 11 (52.4%)
-# CHECK-NEXT: 1, 6 (28.6%)
-# CHECK-NEXT: 2, 4 (19.0%)
+# CHECK-NEXT: 0, 10 (50.0%)
+# CHECK-NEXT: 1, 6 (30.0%)
+# CHECK-NEXT: 2, 4 (20.0%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT: 0, 11 (52.4%)
-# CHECK-NEXT: 1, 6 (28.6%)
-# CHECK-NEXT: 2, 4 (19.0%)
+# CHECK-NEXT: 0, 10 (50.0%)
+# CHECK-NEXT: 1, 6 (30.0%)
+# CHECK-NEXT: 2, 4 (20.0%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
-# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
-# CHECK-NEXT: [# retired], [# cycles]
-# CHECK-NEXT: 0, 14 (66.7%)
-# CHECK-NEXT: 1, 4 (19.0%)
-# CHECK-NEXT: 2, 1 (4.8%)
-# CHECK-NEXT: 3, 2 (9.5%)
-
-# CHECK: Total ROB Entries: 64
-# CHECK-NEXT: Max Used ROB Entries: 6 ( 9.4% )
-# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
-
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 14
-# CHECK-NEXT: Max number of mappings used: 6
+# CHECK-NEXT: Max number of mappings used: 4
# CHECK: Resources:
# CHECK-NEXT: [0.0] - CortexA55UnitALU
@@ -101,20 +90,20 @@ str w0, [x21, x18, lsl #2]
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 0
-
-# CHECK: [0,0] DeeER. . . . ldr w4, [x2], #4
-# CHECK-NEXT: [0,1] .DeeER . . . ldr w5, [x3]
-# CHECK-NEXT: [0,2] . DeeeER. . . madd w0, w5, w4, w0
-# CHECK-NEXT: [0,3] . DeeER. . . add x3, x3, x13
-# CHECK-NEXT: [0,4] . DeeER. . . subs x1, x1, #1
-# CHECK-NEXT: [0,5] . . DeeeER . . str w0, [x21, x18, lsl #2]
-# CHECK-NEXT: [1,0] . . DeeER . . ldr w4, [x2], #4
-# CHECK-NEXT: [1,1] . . DeeER . . ldr w5, [x3]
-# CHECK-NEXT: [1,2] . . . DeeeER . madd w0, w5, w4, w0
-# CHECK-NEXT: [1,3] . . . DeeER . add x3, x3, x13
-# CHECK-NEXT: [1,4] . . . DeeER . subs x1, x1, #1
-# CHECK-NEXT: [1,5] . . . DeeeER str w0, [x21, x18, lsl #2]
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeeE . . . . ldr w4, [x2], #4
+# CHECK-NEXT: [0,1] .DeeE. . . . ldr w5, [x3]
+# CHECK-NEXT: [0,2] . DeeeE . . . madd w0, w5, w4, w0
+# CHECK-NEXT: [0,3] . DeeE . . . add x3, x3, x13
+# CHECK-NEXT: [0,4] . DeeE . . . subs x1, x1, #1
+# CHECK-NEXT: [0,5] . . DeeeE . . str w0, [x21, x18, lsl #2]
+# CHECK-NEXT: [1,0] . . DeeE . . ldr w4, [x2], #4
+# CHECK-NEXT: [1,1] . . DeeE . . ldr w5, [x3]
+# CHECK-NEXT: [1,2] . . . DeeeE . madd w0, w5, w4, w0
+# CHECK-NEXT: [1,3] . . . DeeE . add x3, x3, x13
+# CHECK-NEXT: [1,4] . . . DeeE . subs x1, x1, #1
+# CHECK-NEXT: [1,5] . . . DeeeE str w0, [x21, x18, lsl #2]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
index de5dbaa3490c..c35332420549 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-in-order-retire.s
@@ -10,12 +10,12 @@ add w7, w9, w0
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
-# CHECK-NEXT: Total Cycles: 20
+# CHECK-NEXT: Total Cycles: 19
# CHECK-NEXT: Total uOps: 12
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.60
-# CHECK-NEXT: IPC: 0.60
+# CHECK-NEXT: uOps Per Cycle: 0.63
+# CHECK-NEXT: IPC: 0.63
# CHECK-NEXT: Block RThroughput: 8.0
# CHECK: Instruction Info:
@@ -40,37 +40,26 @@ add w7, w9, w0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
-# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.0%)
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 1 (5.3%)
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 12 (60.0%)
-# CHECK-NEXT: 1, 4 (20.0%)
-# CHECK-NEXT: 2, 4 (20.0%)
+# CHECK-NEXT: 0, 11 (57.9%)
+# CHECK-NEXT: 1, 4 (21.1%)
+# CHECK-NEXT: 2, 4 (21.1%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT: 0, 12 (60.0%)
-# CHECK-NEXT: 1, 4 (20.0%)
-# CHECK-NEXT: 2, 4 (20.0%)
+# CHECK-NEXT: 0, 11 (57.9%)
+# CHECK-NEXT: 1, 4 (21.1%)
+# CHECK-NEXT: 2, 4 (21.1%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
-# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
-# CHECK-NEXT: [# retired], [# cycles]
-# CHECK-NEXT: 0, 14 (70.0%)
-# CHECK-NEXT: 1, 2 (10.0%)
-# CHECK-NEXT: 2, 2 (10.0%)
-# CHECK-NEXT: 3, 2 (10.0%)
-
-# CHECK: Total ROB Entries: 64
-# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% )
-# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
-
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 12
-# CHECK-NEXT: Max number of mappings used: 7
+# CHECK-NEXT: Max number of mappings used: 6
# CHECK: Resources:
# CHECK-NEXT: [0.0] - CortexA55UnitALU
@@ -100,21 +89,21 @@ add w7, w9, w0
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - add w7, w9, w0
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456789
+# CHECK-NEXT: 012345678
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeeER. . . sdiv w12, w21, w0
-# CHECK-NEXT: [0,1] . DeeER. . . add w8, w8, #1
-# CHECK-NEXT: [0,2] . DeeER. . . add w1, w2, w0
-# CHECK-NEXT: [0,3] . .DeeER . . add w3, w4, #1
-# CHECK-NEXT: [0,4] . .DeeER . . add w5, w6, w0
-# CHECK-NEXT: [0,5] . . DeeER . . add w7, w9, w0
-# CHECK-NEXT: [1,0] . . DeeeeeeeER . sdiv w12, w21, w0
-# CHECK-NEXT: [1,1] . . . DeeER . add w8, w8, #1
-# CHECK-NEXT: [1,2] . . . DeeER . add w1, w2, w0
-# CHECK-NEXT: [1,3] . . . DeeER. add w3, w4, #1
-# CHECK-NEXT: [1,4] . . . DeeER. add w5, w6, w0
-# CHECK-NEXT: [1,5] . . . DeeER add w7, w9, w0
+# CHECK: [0,0] DeeeeeeeE . . . sdiv w12, w21, w0
+# CHECK-NEXT: [0,1] . DeeE . . . add w8, w8, #1
+# CHECK-NEXT: [0,2] . DeeE . . . add w1, w2, w0
+# CHECK-NEXT: [0,3] . .DeeE. . . add w3, w4, #1
+# CHECK-NEXT: [0,4] . .DeeE. . . add w5, w6, w0
+# CHECK-NEXT: [0,5] . . DeeE . . add w7, w9, w0
+# CHECK-NEXT: [1,0] . . DeeeeeeeE . sdiv w12, w21, w0
+# CHECK-NEXT: [1,1] . . . DeeE . add w8, w8, #1
+# CHECK-NEXT: [1,2] . . . DeeE . add w1, w2, w0
+# CHECK-NEXT: [1,3] . . . DeeE. add w3, w4, #1
+# CHECK-NEXT: [1,4] . . . DeeE. add w5, w6, w0
+# CHECK-NEXT: [1,5] . . . DeeE add w7, w9, w0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
index 6231116f25ac..8935d254cd98 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-out-of-order-retire.s
@@ -10,12 +10,12 @@ add w7, w9, w0
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 12
-# CHECK-NEXT: Total Cycles: 25
+# CHECK-NEXT: Total Cycles: 24
# CHECK-NEXT: Total uOps: 12
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.48
-# CHECK-NEXT: IPC: 0.48
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Instruction Info:
@@ -40,31 +40,21 @@ add w7, w9, w0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
-# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 7 (28.0%)
+# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 7 (29.2%)
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
-# CHECK-NEXT: 0, 19 (76.0%)
-# CHECK-NEXT: 2, 6 (24.0%)
+# CHECK-NEXT: 0, 18 (75.0%)
+# CHECK-NEXT: 2, 6 (25.0%)
# CHECK: Schedulers - number of cycles where we saw N micro opcodes issued:
# CHECK-NEXT: [# issued], [# cycles]
-# CHECK-NEXT: 0, 19 (76.0%)
-# CHECK-NEXT: 2, 6 (24.0%)
+# CHECK-NEXT: 0, 18 (75.0%)
+# CHECK-NEXT: 2, 6 (25.0%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: No scheduler resources used.
-# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
-# CHECK-NEXT: [# retired], [# cycles]
-# CHECK-NEXT: 0, 18 (72.0%)
-# CHECK-NEXT: 1, 2 (8.0%)
-# CHECK-NEXT: 2, 5 (20.0%)
-
-# CHECK: Total ROB Entries: 64
-# CHECK-NEXT: Max Used ROB Entries: 7 ( 10.9% )
-# CHECK-NEXT: Average Used ROB Entries per cy: 2 ( 3.1% )
-
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 12
# CHECK-NEXT: Max number of mappings used: 7
@@ -98,20 +88,20 @@ add w7, w9, w0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
-# CHECK-NEXT: Index 0123456789 01234
-
-# CHECK: [0,0] DeeeeeeeeeeeeER. . . fdiv s1, s2, s3
-# CHECK-NEXT: [0,1] DeeER. . . . . add w8, w8, #1
-# CHECK-NEXT: [0,2] .DeeER . . . . add w1, w2, w0
-# CHECK-NEXT: [0,3] .DeeER . . . . add w3, w4, #1
-# CHECK-NEXT: [0,4] . DeeER . . . . add w5, w6, w0
-# CHECK-NEXT: [0,5] . DeeER . . . . add w7, w9, w0
-# CHECK-NEXT: [1,0] . . DeeeeeeeeeeeeER fdiv s1, s2, s3
-# CHECK-NEXT: [1,1] . . DeeER. . . add w8, w8, #1
-# CHECK-NEXT: [1,2] . . .DeeER . . add w1, w2, w0
-# CHECK-NEXT: [1,3] . . .DeeER . . add w3, w4, #1
-# CHECK-NEXT: [1,4] . . . DeeER . . add w5, w6, w0
-# CHECK-NEXT: [1,5] . . . DeeER . . add w7, w9, w0
+# CHECK-NEXT: Index 0123456789 0123
+
+# CHECK: [0,0] DeeeeeeeeeeeeE . . . fdiv s1, s2, s3
+# CHECK-NEXT: [0,1] DeeE . . . . . add w8, w8, #1
+# CHECK-NEXT: [0,2] .DeeE. . . . . add w1, w2, w0
+# CHECK-NEXT: [0,3] .DeeE. . . . . add w3, w4, #1
+# CHECK-NEXT: [0,4] . DeeE . . . . add w5, w6, w0
+# CHECK-NEXT: [0,5] . DeeE . . . . add w7, w9, w0
+# CHECK-NEXT: [1,0] . . DeeeeeeeeeeeeE fdiv s1, s2, s3
+# CHECK-NEXT: [1,1] . . DeeE . . . add w8, w8, #1
+# CHECK-NEXT: [1,2] . . .DeeE. . . add w1, w2, w0
+# CHECK-NEXT: [1,3] . . .DeeE. . . add w3, w4, #1
+# CHECK-NEXT: [1,4] . . . DeeE . . add w5, w6, w0
+# CHECK-NEXT: [1,5] . . . DeeE . . add w7, w9, w0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s
index 64ee3bdc8355..be57731389ba 100644
--- a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s
+++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-add-sequence.s
@@ -7,12 +7,12 @@ v_add_f32 v2, v1, v0
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
-# CHECK-NEXT: Total Cycles: 13
+# CHECK-NEXT: Total Cycles: 12
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 1
-# CHECK-NEXT: uOps Per Cycle: 0.23
-# CHECK-NEXT: IPC: 0.23
+# CHECK-NEXT: uOps Per Cycle: 0.25
+# CHECK-NEXT: IPC: 0.25
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
@@ -48,12 +48,12 @@ v_add_f32 v2, v1, v0
# CHECK-NEXT: - - - 1.00 - 1.00 - v_add_f32_e32 v2, v1, v0
# CHECK: Timeline view:
-# CHECK-NEXT: 012
+# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeER . . v_add_f32_e32 v0, v0, v0
-# CHECK-NEXT: [0,1] .DeeeeER . . v_add_f32_e32 v1, v1, v1
-# CHECK-NEXT: [0,2] . .DeeeeER v_add_f32_e32 v2, v1, v0
+# CHECK: [0,0] DeeeeE .. v_add_f32_e32 v0, v0, v0
+# CHECK-NEXT: [0,1] .DeeeeE .. v_add_f32_e32 v1, v1, v1
+# CHECK-NEXT: [0,2] . .DeeeeE v_add_f32_e32 v2, v1, v0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s
index 906ce86b98e5..28d811f01806 100644
--- a/llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s
+++ b/llvm/test/tools/llvm-mca/AMDGPU/gfx10-double.s
@@ -42,7 +42,7 @@ v_sqrt_f64 v[4:5], v[4:5]
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 27
-# CHECK-NEXT: Total Cycles: 205
+# CHECK-NEXT: Total Cycles: 204
# CHECK-NEXT: Total uOps: 27
# CHECK: Dispatch Width: 1
@@ -134,19 +134,19 @@ v_sqrt_f64 v[4:5], v[4:5]
# CHECK-NEXT: 0123456789 0123456789 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
-# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
-# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeER. . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
-# CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
-# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
-# CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
-# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
-# CHECK-NEXT: [0,6] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
-# CHECK-NEXT: [0,7] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: [0,8] . . . . . DeeeeeeeeeeeeeeeeeeeeeER . . . . . v_fract_f64_e32 v[4:5], v[4:5]
-# CHECK-NEXT: [0,9] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER . v_trunc_f64_e32 v[0:1], v[0:1]
-# CHECK-NEXT: [0,10] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER . v_ceil_f64_e32 v[2:3], v[2:3]
-# CHECK-NEXT: [0,11] . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeER. v_rndne_f64_e32 v[4:5], v[4:5]
-# CHECK-NEXT: [0,12] . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeER v_floor_f64_e32 v[6:7], v[6:7]
+# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . v_cvt_i32_f64_e32 v0, v[0:1]
+# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . . v_cvt_f64_i32_e32 v[2:3], v2
+# CHECK-NEXT: [0,2] . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . . . . . v_cvt_f32_f64_e32 v4, v[4:5]
+# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_f64_f32_e32 v[6:7], v6
+# CHECK-NEXT: [0,4] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_u32_f64_e32 v8, v[8:9]
+# CHECK-NEXT: [0,5] . DeeeeeeeeeeeeeeeeeeeeeE . . . . . . . . . v_cvt_f64_u32_e32 v[10:11], v10
+# CHECK-NEXT: [0,6] . . . . . DeeeeeeeeeeeeeeeeeeeeeE. . . . . . v_frexp_exp_i32_f64_e32 v0, v[0:1]
+# CHECK-NEXT: [0,7] . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . v_frexp_mant_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: [0,8] . . . . . DeeeeeeeeeeeeeeeeeeeeeE . . . . . v_fract_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: [0,9] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . v_trunc_f64_e32 v[0:1], v[0:1]
+# CHECK-NEXT: [0,10] . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE . v_ceil_f64_e32 v[2:3], v[2:3]
+# CHECK-NEXT: [0,11] . . . . . . . . . .DeeeeeeeeeeeeeeeeeeeeeE . v_rndne_f64_e32 v[4:5], v[4:5]
+# CHECK-NEXT: [0,12] . . . . . . . . . . DeeeeeeeeeeeeeeeeeeeeeE. v_floor_f64_e32 v[6:7], v[6:7]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s b/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s
index f195c069ef15..ad9a2b1ad0c2 100644
--- a/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s
+++ b/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s
@@ -9,12 +9,12 @@ vldr d0, [r1]
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
-# CHECK-NEXT: Total Cycles: 7
+# CHECK-NEXT: Total Cycles: 6
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 2
-# CHECK-NEXT: uOps Per Cycle: 0.43
-# CHECK-NEXT: IPC: 0.43
+# CHECK-NEXT: uOps Per Cycle: 0.50
+# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 1.5
# CHECK: Instruction Info:
@@ -56,11 +56,11 @@ vldr d0, [r1]
# CHECK-NEXT: - - - - 1.00 - - - - - - - 2.00 vldr d0, [r1]
# CHECK: Timeline view:
-# CHECK-NEXT: Index 0123456
+# CHECK-NEXT: Index 012345
-# CHECK: [0,0] DER .. add.w r1, r1, #1
-# CHECK-NEXT: [0,1] .DER .. add.w r1, r1, #2
-# CHECK-NEXT: [0,2] . DeER vldr d0, [r1]
+# CHECK: [0,0] DE . add.w r1, r1, #1
+# CHECK-NEXT: [0,1] .DE . add.w r1, r1, #2
+# CHECK-NEXT: [0,2] . DeE vldr d0, [r1]
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp
index c8b481bc7ce6..ceeb267cf119 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -77,8 +77,10 @@ void TimelineView::onEvent(const HWInstructionEvent &Event) {
"Instruction cannot be ready if it hasn't been dispatched yet!");
WTEntry.CyclesSpentInSQWhileReady +=
TVEntry.CycleIssued - TVEntry.CycleReady;
- WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
- (CurrentCycle - 1) - TVEntry.CycleExecuted;
+ if (CurrentCycle > TVEntry.CycleExecuted) {
+ WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
+ (CurrentCycle - 1) - TVEntry.CycleExecuted;
+ }
break;
}
case HWInstructionEvent::Ready:
@@ -243,7 +245,8 @@ void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
OS << TimelineView::DisplayChar::RetireLag;
- OS << TimelineView::DisplayChar::Retired;
+ if (Entry.CycleExecuted < Entry.CycleRetired)
+ OS << TimelineView::DisplayChar::Retired;
// Skip other columns.
for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index 830a619d2e32..0e0a39883f93 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -278,7 +278,8 @@ static void processViewOptions(bool IsOutOfOrder) {
processOptionImpl(PrintRegisterFileStats, Default);
processOptionImpl(PrintDispatchStats, Default);
processOptionImpl(PrintSchedulerStats, Default);
- processOptionImpl(PrintRetireStats, Default);
+ if (IsOutOfOrder)
+ processOptionImpl(PrintRetireStats, Default);
}
// Returns true on success.
More information about the llvm-commits
mailing list