[llvm] [WIP][RFC] Teach MCA constant registers do not create dependencies (PR #89387)
Rin Dobrescu via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 03:33:27 PDT 2024
https://github.com/Rin18 updated https://github.com/llvm/llvm-project/pull/89387
>From 7d29ddb340ecae3eaf0ab7382efd26388daae1f7 Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Thu, 18 Apr 2024 13:09:52 +0000
Subject: [PATCH 1/4] Precommit test
---
.../AArch64/Neoverse/V1-zero-dependency.s | 77 +++++++++++++++++++
1 file changed, 77 insertions(+)
create mode 100644 llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
new file mode 100644
index 00000000000000..ac3b6d55272a8c
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -0,0 +1,77 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 --timeline --timeline-max-iterations=4 < %s | FileCheck %s
+
+mov x0, x1
+cmp x0, #4
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 203
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 0.99
+# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.25 mov x0, x1
+# CHECK-NEXT: 1 1 0.33 cmp x0, #4
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V1UnitB
+# CHECK-NEXT: [0.1] - V1UnitB
+# CHECK-NEXT: [1.0] - V1UnitD
+# CHECK-NEXT: [1.1] - V1UnitD
+# CHECK-NEXT: [2] - V1UnitL2
+# CHECK-NEXT: [3.0] - V1UnitL01
+# CHECK-NEXT: [3.1] - V1UnitL01
+# CHECK-NEXT: [4] - V1UnitM0
+# CHECK-NEXT: [5] - V1UnitM1
+# CHECK-NEXT: [6.0] - V1UnitS
+# CHECK-NEXT: [6.1] - V1UnitS
+# CHECK-NEXT: [7] - V1UnitV0
+# CHECK-NEXT: [8] - V1UnitV1
+# CHECK-NEXT: [9] - V1UnitV2
+# CHECK-NEXT: [10] - V1UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10]
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10] Instructions:
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - mov x0, x1
+# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - cmp x0, #4
+
+# CHECK: Timeline view:
+# CHECK-NEXT: 0
+# CHECK-NEXT: Index 0123456789
+
+# CHECK: [0,0] DeER . . mov x0, x1
+# CHECK-NEXT: [0,1] D=eER. . cmp x0, #4
+# CHECK-NEXT: [1,0] D==eER . mov x0, x1
+# CHECK-NEXT: [1,1] D===eER . cmp x0, #4
+# CHECK-NEXT: [2,0] D====eER . mov x0, x1
+# CHECK-NEXT: [2,1] D=====eER . cmp x0, #4
+# CHECK-NEXT: [3,0] D======eER. mov x0, x1
+# CHECK-NEXT: [3,1] D=======eER cmp x0, #4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 4.0 0.3 0.0 mov x0, x1
+# CHECK-NEXT: 1. 4 5.0 0.0 0.0 cmp x0, #4
+# CHECK-NEXT: 4 4.5 0.1 0.0 <total>
>From f6cc74659d121b64a10fb8c87baa615670c456cb Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Fri, 19 Apr 2024 14:11:37 +0000
Subject: [PATCH 2/4] Add constant register check.
---
llvm/include/llvm/MC/MCRegisterInfo.h | 7 ++++
llvm/lib/MCA/InstrBuilder.cpp | 17 +++++++--
.../AArch64/Neoverse/V1-zero-dependency.s | 37 +++++++++----------
llvm/utils/TableGen/RegisterInfoEmitter.cpp | 3 +-
4 files changed, 41 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index c648ef20fa84c6..4dbd4435a8c719 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -126,6 +126,9 @@ struct MCRegisterDesc {
/// Index into list with lane mask sequences. The sequence contains a lanemask
/// for every register unit.
uint16_t RegUnitLaneMasks;
+
+ // Returns true for constant registers
+ bool Constant;
};
/// MCRegisterInfo base class - We assume that the target defines a static
@@ -382,6 +385,10 @@ class MCRegisterInfo {
return RegStrings + get(RegNo).Name;
}
+ bool isConstant(MCRegister RegNo) const {
+ return get(RegNo).Constant && MCRegister::isPhysicalRegister(RegNo.id());
+ }
+
/// Return the number of registers this target has (useful for
/// sizing arrays holding per register information)
unsigned getNumRegs() const {
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 1a82e45763a267..ad558dd7cf858a 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -320,9 +320,9 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
ID.Writes.resize(TotalDefs + NumVariadicOps);
- // Iterate over the operands list, and skip non-register operands.
- // The first NumExplicitDefs register operands are expected to be register
- // definitions.
+ // Iterate over the operands list, and skip non-register or constant register
+ // operands. The first NumExplicitDefs register operands are expected to be
+ // register definitions.
unsigned CurrentDef = 0;
unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
unsigned i = 0;
@@ -335,6 +335,10 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
OptionalDefIdx = CurrentDef++;
continue;
}
+ if (MRI.isConstant(Op.getReg())) {
+ CurrentDef++;
+ continue;
+ }
WriteDescriptor &Write = ID.Writes[CurrentDef];
Write.OpIndex = i;
@@ -413,6 +417,8 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
const MCOperand &Op = MCI.getOperand(OpIndex);
if (!Op.isReg())
continue;
+ if (MRI.isConstant(Op.getReg()))
+ continue;
WriteDescriptor &Write = ID.Writes[CurrentDef];
Write.OpIndex = OpIndex;
@@ -449,6 +455,9 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
if (!Op.isReg())
continue;
+ if (MRI.isConstant(Op.getReg()))
+ continue;
+
ReadDescriptor &Read = ID.Reads[CurrentUse];
Read.OpIndex = OpIndex;
Read.UseIndex = I;
@@ -465,6 +474,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
Read.OpIndex = ~I;
Read.UseIndex = NumExplicitUses + I;
Read.RegisterID = MCDesc.implicit_uses()[I];
+ if (MRI.isConstant(Read.RegisterID))
+ continue;
Read.SchedClassID = SchedClassID;
LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
<< ", UseIndex=" << Read.UseIndex << ", RegisterID="
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
index ac3b6d55272a8c..071329fd00cdd2 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -6,12 +6,12 @@ cmp x0, #4
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 200
-# CHECK-NEXT: Total Cycles: 203
+# CHECK-NEXT: Total Cycles: 54
# CHECK-NEXT: Total uOps: 200
# CHECK: Dispatch Width: 15
-# CHECK-NEXT: uOps Per Cycle: 0.99
-# CHECK-NEXT: IPC: 0.99
+# CHECK-NEXT: uOps Per Cycle: 3.70
+# CHECK-NEXT: IPC: 3.70
# CHECK-NEXT: Block RThroughput: 0.3
# CHECK: Instruction Info:
@@ -45,25 +45,24 @@ cmp x0, #4
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10]
-# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - -
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10] Instructions:
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - mov x0, x1
-# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - cmp x0, #4
+# CHECK-NEXT: - - - - - - - 0.48 0.50 0.01 0.01 - - - - mov x0, x1
+# CHECK-NEXT: - - - - - - - 0.02 - 0.49 0.49 - - - - cmp x0, #4
# CHECK: Timeline view:
-# CHECK-NEXT: 0
-# CHECK-NEXT: Index 0123456789
+# CHECK-NEXT: Index 012345
-# CHECK: [0,0] DeER . . mov x0, x1
-# CHECK-NEXT: [0,1] D=eER. . cmp x0, #4
-# CHECK-NEXT: [1,0] D==eER . mov x0, x1
-# CHECK-NEXT: [1,1] D===eER . cmp x0, #4
-# CHECK-NEXT: [2,0] D====eER . mov x0, x1
-# CHECK-NEXT: [2,1] D=====eER . cmp x0, #4
-# CHECK-NEXT: [3,0] D======eER. mov x0, x1
-# CHECK-NEXT: [3,1] D=======eER cmp x0, #4
+# CHECK: [0,0] DeER . mov x0, x1
+# CHECK-NEXT: [0,1] D=eER. cmp x0, #4
+# CHECK-NEXT: [1,0] DeE-R. mov x0, x1
+# CHECK-NEXT: [1,1] D=eER. cmp x0, #4
+# CHECK-NEXT: [2,0] DeE-R. mov x0, x1
+# CHECK-NEXT: [2,1] D=eER. cmp x0, #4
+# CHECK-NEXT: [3,0] DeE-R. mov x0, x1
+# CHECK-NEXT: [3,1] D==eER cmp x0, #4
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -72,6 +71,6 @@ cmp x0, #4
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 4 4.0 0.3 0.0 mov x0, x1
-# CHECK-NEXT: 1. 4 5.0 0.0 0.0 cmp x0, #4
-# CHECK-NEXT: 4 4.5 0.1 0.0 <total>
+# CHECK-NEXT: 0. 4 1.0 1.0 0.8 mov x0, x1
+# CHECK-NEXT: 1. 4 2.3 0.3 0.0 cmp x0, #4
+# CHECK-NEXT: 4 1.6 0.6 0.4 <total>
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index ee8830edeedbbe..f6f93154c454d3 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -977,7 +977,8 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
<< DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
<< ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
<< (Offset << RegUnitBits | FirstRU) << ", "
- << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
+ << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << ", " << Reg.Constant
+ << " },\n";
++i;
}
OS << "};\n\n"; // End of register descriptors...
>From b9a89f6be46f562b828a8c22a915259015889a75 Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Mon, 22 Apr 2024 15:57:06 +0000
Subject: [PATCH 3/4] Fix test, add check and address comments.
---
llvm/include/llvm/MC/MCRegisterInfo.h | 9 ++++---
llvm/lib/MCA/InstrBuilder.cpp | 3 +--
.../AArch64/HiSilicon/tsv110-forwarding.s | 24 +++++++++----------
3 files changed, 17 insertions(+), 19 deletions(-)
diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index 4dbd4435a8c719..af5be9186108af 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -127,8 +127,8 @@ struct MCRegisterDesc {
/// for every register unit.
uint16_t RegUnitLaneMasks;
- // Returns true for constant registers
- bool Constant;
+ // Is true for constant registers.
+ bool IsConstant;
};
/// MCRegisterInfo base class - We assume that the target defines a static
@@ -385,9 +385,8 @@ class MCRegisterInfo {
return RegStrings + get(RegNo).Name;
}
- bool isConstant(MCRegister RegNo) const {
- return get(RegNo).Constant && MCRegister::isPhysicalRegister(RegNo.id());
- }
+ /// Returns true if the given register is constant.
+ bool isConstant(MCRegister RegNo) const { return get(RegNo).IsConstant; }
/// Return the number of registers this target has (useful for
/// sizing arrays holding per register information)
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index ad558dd7cf858a..8a3d653f32e47c 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -454,7 +454,6 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
const MCOperand &Op = MCI.getOperand(OpIndex);
if (!Op.isReg())
continue;
-
if (MRI.isConstant(Op.getReg()))
continue;
@@ -760,7 +759,7 @@ InstrBuilder::createInstruction(const MCInst &MCI,
RegID = WD.isImplicitWrite() ? WD.RegisterID
: MCI.getOperand(WD.OpIndex).getReg();
// Check if this is a optional definition that references NoReg.
- if (WD.IsOptionalDef && !RegID) {
+ if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {
++WriteIndex;
continue;
}
diff --git a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
index 207822b618396e..b29697ea7972b1 100644
--- a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
+++ b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
@@ -52,22 +52,22 @@ madd x0, x0, x0, x0
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 4
-# CHECK-NEXT: Total Cycles: 13
+# CHECK-NEXT: Total Cycles: 12
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.31
-# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
-# CHECK-NEXT: 012
+# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeER . . mul x0, x1, x2
-# CHECK-NEXT: [0,1] D=eeeeER . . madd x0, x1, x2, x0
-# CHECK-NEXT: [0,2] D==eeeeER . . madd x0, x1, x2, x0
-# CHECK-NEXT: [0,3] D======eeeeER madd x0, x0, x0, x0
+# CHECK: [0,0] DeeeeER .. mul x0, x1, x2
+# CHECK-NEXT: [0,1] D==eeeeER .. madd x0, x1, x2, x0
+# CHECK-NEXT: [0,2] D=eeeeE-R .. madd x0, x1, x2, x0
+# CHECK-NEXT: [0,3] D=====eeeeER madd x0, x0, x0, x0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -77,7 +77,7 @@ madd x0, x0, x0, x0
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mul x0, x1, x2
-# CHECK-NEXT: 1. 1 2.0 0.0 0.0 madd x0, x1, x2, x0
-# CHECK-NEXT: 2. 1 3.0 0.0 0.0 madd x0, x1, x2, x0
-# CHECK-NEXT: 3. 1 7.0 0.0 0.0 madd x0, x0, x0, x0
-# CHECK-NEXT: 1 3.3 0.3 0.0 <total>
+# CHECK-NEXT: 1. 1 3.0 3.0 0.0 madd x0, x1, x2, x0
+# CHECK-NEXT: 2. 1 2.0 2.0 1.0 madd x0, x1, x2, x0
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 madd x0, x0, x0, x0
+# CHECK-NEXT: 1 3.0 1.5 0.3 <total>
>From a7573bc0be7886a62ebc43130d6af831ffbff728 Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Tue, 23 Apr 2024 10:30:28 +0000
Subject: [PATCH 4/4] Update print line before emitting register descriptors.
---
llvm/utils/TableGen/RegisterInfoEmitter.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index f6f93154c454d3..980d9a39636ea7 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -962,7 +962,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
OS << "extern const MCRegisterDesc " << TargetName
<< "RegDesc[] = { // Descriptors\n";
- OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n";
+ OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0, 0, 0 },\n";
// Emit the register descriptors now.
i = 0;
More information about the llvm-commits
mailing list