[llvm] 385f59f - [llvm-mca] Teach MCA constant registers do not create dependencies (#89387)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 3 02:30:26 PDT 2024
Author: Rin Dobrescu
Date: 2024-05-03T10:30:22+01:00
New Revision: 385f59f9f570b77bf8bd636ba2f65a08a7227499
URL: https://github.com/llvm/llvm-project/commit/385f59f9f570b77bf8bd636ba2f65a08a7227499
DIFF: https://github.com/llvm/llvm-project/commit/385f59f9f570b77bf8bd636ba2f65a08a7227499.diff
LOG: [llvm-mca] Teach MCA constant registers do not create dependencies (#89387)
Constant registers like the zero registers XZR and WZR are treated as
any other register by LLVM-MCA. This can create non existent dependency
chains.
Currently there is no method in MCA to query if a register is constant.
This patch fixes the issue by adding a bool Constant
variable to MCRegisterDesc that is true for constant registers. Since
constant registers do not create dependencies, it makes sense to add
this check to MCA.
Added:
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
Modified:
llvm/include/llvm/MC/MCRegisterInfo.h
llvm/lib/MCA/InstrBuilder.cpp
llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
llvm/utils/TableGen/RegisterInfoEmitter.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index c648ef20fa84c6..af5be9186108af 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -126,6 +126,9 @@ struct MCRegisterDesc {
/// Index into list with lane mask sequences. The sequence contains a lanemask
/// for every register unit.
uint16_t RegUnitLaneMasks;
+
+ // Is true for constant registers.
+ bool IsConstant;
};
/// MCRegisterInfo base class - We assume that the target defines a static
@@ -382,6 +385,9 @@ class MCRegisterInfo {
return RegStrings + get(RegNo).Name;
}
+ /// Returns true if the given register is constant.
+ bool isConstant(MCRegister RegNo) const { return get(RegNo).IsConstant; }
+
/// Return the number of registers this target has (useful for
/// sizing arrays holding per register information)
unsigned getNumRegs() const {
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 2e3ebe3d9073bd..bcf065c5669188 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -320,9 +320,9 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
ID.Writes.resize(TotalDefs + NumVariadicOps);
- // Iterate over the operands list, and skip non-register operands.
- // The first NumExplicitDefs register operands are expected to be register
- // definitions.
+ // Iterate over the operands list, and skip non-register or constant register
+ // operands. The first NumExplicitDefs register operands are expected to be
+ // register definitions.
unsigned CurrentDef = 0;
unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
unsigned i = 0;
@@ -335,6 +335,10 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
OptionalDefIdx = CurrentDef++;
continue;
}
+ if (MRI.isConstant(Op.getReg())) {
+ CurrentDef++;
+ continue;
+ }
WriteDescriptor &Write = ID.Writes[CurrentDef];
Write.OpIndex = i;
@@ -413,6 +417,8 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
const MCOperand &Op = MCI.getOperand(OpIndex);
if (!Op.isReg())
continue;
+ if (MRI.isConstant(Op.getReg()))
+ continue;
WriteDescriptor &Write = ID.Writes[CurrentDef];
Write.OpIndex = OpIndex;
@@ -448,6 +454,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
const MCOperand &Op = MCI.getOperand(OpIndex);
if (!Op.isReg())
continue;
+ if (MRI.isConstant(Op.getReg()))
+ continue;
ReadDescriptor &Read = ID.Reads[CurrentUse];
Read.OpIndex = OpIndex;
@@ -465,6 +473,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
Read.OpIndex = ~I;
Read.UseIndex = NumExplicitUses + I;
Read.RegisterID = MCDesc.implicit_uses()[I];
+ if (MRI.isConstant(Read.RegisterID))
+ continue;
Read.SchedClassID = SchedClassID;
LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
<< ", UseIndex=" << Read.UseIndex << ", RegisterID="
@@ -747,8 +757,9 @@ InstrBuilder::createInstruction(const MCInst &MCI,
for (const WriteDescriptor &WD : D.Writes) {
RegID = WD.isImplicitWrite() ? WD.RegisterID
: MCI.getOperand(WD.OpIndex).getReg();
- // Check if this is a optional definition that references NoReg.
- if (WD.IsOptionalDef && !RegID) {
+ // Check if this is a optional definition that references NoReg or a write
+ // to a constant register.
+ if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {
++WriteIndex;
continue;
}
diff --git a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
index 207822b618396e..b29697ea7972b1 100644
--- a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
+++ b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
@@ -52,22 +52,22 @@ madd x0, x0, x0, x0
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 4
-# CHECK-NEXT: Total Cycles: 13
+# CHECK-NEXT: Total Cycles: 12
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 4
-# CHECK-NEXT: uOps Per Cycle: 0.31
-# CHECK-NEXT: IPC: 0.31
+# CHECK-NEXT: uOps Per Cycle: 0.33
+# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
-# CHECK-NEXT: 012
+# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeER . . mul x0, x1, x2
-# CHECK-NEXT: [0,1] D=eeeeER . . madd x0, x1, x2, x0
-# CHECK-NEXT: [0,2] D==eeeeER . . madd x0, x1, x2, x0
-# CHECK-NEXT: [0,3] D======eeeeER madd x0, x0, x0, x0
+# CHECK: [0,0] DeeeeER .. mul x0, x1, x2
+# CHECK-NEXT: [0,1] D==eeeeER .. madd x0, x1, x2, x0
+# CHECK-NEXT: [0,2] D=eeeeE-R .. madd x0, x1, x2, x0
+# CHECK-NEXT: [0,3] D=====eeeeER madd x0, x0, x0, x0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -77,7 +77,7 @@ madd x0, x0, x0, x0
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mul x0, x1, x2
-# CHECK-NEXT: 1. 1 2.0 0.0 0.0 madd x0, x1, x2, x0
-# CHECK-NEXT: 2. 1 3.0 0.0 0.0 madd x0, x1, x2, x0
-# CHECK-NEXT: 3. 1 7.0 0.0 0.0 madd x0, x0, x0, x0
-# CHECK-NEXT: 1 3.3 0.3 0.0 <total>
+# CHECK-NEXT: 1. 1 3.0 3.0 0.0 madd x0, x1, x2, x0
+# CHECK-NEXT: 2. 1 2.0 2.0 1.0 madd x0, x1, x2, x0
+# CHECK-NEXT: 3. 1 6.0 0.0 0.0 madd x0, x0, x0, x0
+# CHECK-NEXT: 1 3.0 1.5 0.3 <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
new file mode 100644
index 00000000000000..071329fd00cdd2
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -0,0 +1,76 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 --timeline --timeline-max-iterations=4 < %s | FileCheck %s
+
+mov x0, x1
+cmp x0, #4
+
+# CHECK: Iterations: 100
+# CHECK-NEXT: Instructions: 200
+# CHECK-NEXT: Total Cycles: 54
+# CHECK-NEXT: Total uOps: 200
+
+# CHECK: Dispatch Width: 15
+# CHECK-NEXT: uOps Per Cycle: 3.70
+# CHECK-NEXT: IPC: 3.70
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 1 0.25 mov x0, x1
+# CHECK-NEXT: 1 1 0.33 cmp x0, #4
+
+# CHECK: Resources:
+# CHECK-NEXT: [0.0] - V1UnitB
+# CHECK-NEXT: [0.1] - V1UnitB
+# CHECK-NEXT: [1.0] - V1UnitD
+# CHECK-NEXT: [1.1] - V1UnitD
+# CHECK-NEXT: [2] - V1UnitL2
+# CHECK-NEXT: [3.0] - V1UnitL01
+# CHECK-NEXT: [3.1] - V1UnitL01
+# CHECK-NEXT: [4] - V1UnitM0
+# CHECK-NEXT: [5] - V1UnitM1
+# CHECK-NEXT: [6.0] - V1UnitS
+# CHECK-NEXT: [6.1] - V1UnitS
+# CHECK-NEXT: [7] - V1UnitV0
+# CHECK-NEXT: [8] - V1UnitV1
+# CHECK-NEXT: [9] - V1UnitV2
+# CHECK-NEXT: [10] - V1UnitV3
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10]
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - - - -
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10] Instructions:
+# CHECK-NEXT: - - - - - - - 0.48 0.50 0.01 0.01 - - - - mov x0, x1
+# CHECK-NEXT: - - - - - - - 0.02 - 0.49 0.49 - - - - cmp x0, #4
+
+# CHECK: Timeline view:
+# CHECK-NEXT: Index 012345
+
+# CHECK: [0,0] DeER . mov x0, x1
+# CHECK-NEXT: [0,1] D=eER. cmp x0, #4
+# CHECK-NEXT: [1,0] DeE-R. mov x0, x1
+# CHECK-NEXT: [1,1] D=eER. cmp x0, #4
+# CHECK-NEXT: [2,0] DeE-R. mov x0, x1
+# CHECK-NEXT: [2,1] D=eER. cmp x0, #4
+# CHECK-NEXT: [3,0] DeE-R. mov x0, x1
+# CHECK-NEXT: [3,1] D==eER cmp x0, #4
+
+# CHECK: Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK: [0] [1] [2] [3]
+# CHECK-NEXT: 0. 4 1.0 1.0 0.8 mov x0, x1
+# CHECK-NEXT: 1. 4 2.3 0.3 0.0 cmp x0, #4
+# CHECK-NEXT: 4 1.6 0.6 0.4 <total>
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index ee8830edeedbbe..980d9a39636ea7 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -962,7 +962,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
OS << "extern const MCRegisterDesc " << TargetName
<< "RegDesc[] = { // Descriptors\n";
- OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n";
+ OS << " { " << RegStrings.get("") << ", 0, 0, 0, 0, 0, 0 },\n";
// Emit the register descriptors now.
i = 0;
@@ -977,7 +977,8 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
<< DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
<< ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
<< (Offset << RegUnitBits | FirstRU) << ", "
- << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
+ << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << ", " << Reg.Constant
+ << " },\n";
++i;
}
OS << "};\n\n"; // End of register descriptors...
More information about the llvm-commits
mailing list