[llvm] 385f59f - [llvm-mca] Teach MCA constant registers do not create dependencies (#89387)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 3 02:30:26 PDT 2024


Author: Rin Dobrescu
Date: 2024-05-03T10:30:22+01:00
New Revision: 385f59f9f570b77bf8bd636ba2f65a08a7227499

URL: https://github.com/llvm/llvm-project/commit/385f59f9f570b77bf8bd636ba2f65a08a7227499
DIFF: https://github.com/llvm/llvm-project/commit/385f59f9f570b77bf8bd636ba2f65a08a7227499.diff

LOG: [llvm-mca] Teach MCA constant registers do not create dependencies (#89387)

Constant registers like the zero registers XZR and WZR are treated as
any other register by LLVM-MCA. This can create non existent dependency
chains.
Currently there is no method in MCA to query if a register is constant.
This patch fixes the issue by adding a bool Constant
variable to MCRegisterDesc that is true for constant registers. Since
constant registers do not create dependencies, it makes sense to add
this check to MCA.

Added: 
    llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s

Modified: 
    llvm/include/llvm/MC/MCRegisterInfo.h
    llvm/lib/MCA/InstrBuilder.cpp
    llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
    llvm/utils/TableGen/RegisterInfoEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index c648ef20fa84c6..af5be9186108af 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -126,6 +126,9 @@ struct MCRegisterDesc {
   /// Index into list with lane mask sequences. The sequence contains a lanemask
   /// for every register unit.
   uint16_t RegUnitLaneMasks;
+
+  // Is true for constant registers.
+  bool IsConstant;
 };
 
 /// MCRegisterInfo base class - We assume that the target defines a static
@@ -382,6 +385,9 @@ class MCRegisterInfo {
     return RegStrings + get(RegNo).Name;
   }
 
+  /// Returns true if the given register is constant.
+  bool isConstant(MCRegister RegNo) const { return get(RegNo).IsConstant; }
+
   /// Return the number of registers this target has (useful for
   /// sizing arrays holding per register information)
   unsigned getNumRegs() const {

diff  --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 2e3ebe3d9073bd..bcf065c5669188 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -320,9 +320,9 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
 
   unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
   ID.Writes.resize(TotalDefs + NumVariadicOps);
-  // Iterate over the operands list, and skip non-register operands.
-  // The first NumExplicitDefs register operands are expected to be register
-  // definitions.
+  // Iterate over the operands list, and skip non-register or constant register
+  // operands. The first NumExplicitDefs register operands are expected to be
+  // register definitions.
   unsigned CurrentDef = 0;
   unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
   unsigned i = 0;
@@ -335,6 +335,10 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
       OptionalDefIdx = CurrentDef++;
       continue;
     }
+    if (MRI.isConstant(Op.getReg())) {
+      CurrentDef++;
+      continue;
+    }
 
     WriteDescriptor &Write = ID.Writes[CurrentDef];
     Write.OpIndex = i;
@@ -413,6 +417,8 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
     const MCOperand &Op = MCI.getOperand(OpIndex);
     if (!Op.isReg())
       continue;
+    if (MRI.isConstant(Op.getReg()))
+      continue;
 
     WriteDescriptor &Write = ID.Writes[CurrentDef];
     Write.OpIndex = OpIndex;
@@ -448,6 +454,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
     const MCOperand &Op = MCI.getOperand(OpIndex);
     if (!Op.isReg())
       continue;
+    if (MRI.isConstant(Op.getReg()))
+      continue;
 
     ReadDescriptor &Read = ID.Reads[CurrentUse];
     Read.OpIndex = OpIndex;
@@ -465,6 +473,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
     Read.OpIndex = ~I;
     Read.UseIndex = NumExplicitUses + I;
     Read.RegisterID = MCDesc.implicit_uses()[I];
+    if (MRI.isConstant(Read.RegisterID))
+      continue;
     Read.SchedClassID = SchedClassID;
     LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
                       << ", UseIndex=" << Read.UseIndex << ", RegisterID="
@@ -747,8 +757,9 @@ InstrBuilder::createInstruction(const MCInst &MCI,
   for (const WriteDescriptor &WD : D.Writes) {
     RegID = WD.isImplicitWrite() ? WD.RegisterID
                                  : MCI.getOperand(WD.OpIndex).getReg();
-    // Check if this is a optional definition that references NoReg.
-    if (WD.IsOptionalDef && !RegID) {
+    // Check if this is a optional definition that references NoReg or a write
+    // to a constant register.
+    if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {
       ++WriteIndex;
       continue;
     }

diff  --git a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
index 207822b618396e..b29697ea7972b1 100644
--- a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
+++ b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
@@ -52,22 +52,22 @@ madd x0, x0, x0, x0
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      4
-# CHECK-NEXT: Total Cycles:      13
+# CHECK-NEXT: Total Cycles:      12
 # CHECK-NEXT: Total uOps:        4
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.31
-# CHECK-NEXT: IPC:               0.31
+# CHECK-NEXT: uOps Per Cycle:    0.33
+# CHECK-NEXT: IPC:               0.33
 # CHECK-NEXT: Block RThroughput: 4.0
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012
+# CHECK-NEXT:                     01
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeER   . .   mul	x0, x1, x2
-# CHECK-NEXT: [0,1]     D=eeeeER  . .   madd	x0, x1, x2, x0
-# CHECK-NEXT: [0,2]     D==eeeeER . .   madd	x0, x1, x2, x0
-# CHECK-NEXT: [0,3]     D======eeeeER   madd	x0, x0, x0, x0
+# CHECK:      [0,0]     DeeeeER   ..   mul	x0, x1, x2
+# CHECK-NEXT: [0,1]     D==eeeeER ..   madd	x0, x1, x2, x0
+# CHECK-NEXT: [0,2]     D=eeeeE-R ..   madd	x0, x1, x2, x0
+# CHECK-NEXT: [0,3]     D=====eeeeER   madd	x0, x0, x0, x0
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -77,7 +77,7 @@ madd x0, x0, x0, x0
 
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       mul	x0, x1, x2
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       madd	x0, x1, x2, x0
-# CHECK-NEXT: 2.     1     3.0    0.0    0.0       madd	x0, x1, x2, x0
-# CHECK-NEXT: 3.     1     7.0    0.0    0.0       madd	x0, x0, x0, x0
-# CHECK-NEXT:        1     3.3    0.3    0.0       <total>
+# CHECK-NEXT: 1.     1     3.0    3.0    0.0       madd	x0, x1, x2, x0
+# CHECK-NEXT: 2.     1     2.0    2.0    1.0       madd	x0, x1, x2, x0
+# CHECK-NEXT: 3.     1     6.0    0.0    0.0       madd	x0, x0, x0, x0
+# CHECK-NEXT:        1     3.0    1.5    0.3       <total>

diff  --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
new file mode 100644
index 00000000000000..071329fd00cdd2
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -0,0 +1,76 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 --timeline --timeline-max-iterations=4 < %s | FileCheck %s
+
+mov x0, x1
+cmp x0, #4
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      200
+# CHECK-NEXT: Total Cycles:      54
+# CHECK-NEXT: Total uOps:        200
+
+# CHECK:      Dispatch Width:    15
+# CHECK-NEXT: uOps Per Cycle:    3.70
+# CHECK-NEXT: IPC:               3.70
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        mov	x0, x1
+# CHECK-NEXT:  1      1     0.33                        cmp	x0, #4
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - V1UnitB
+# CHECK-NEXT: [0.1] - V1UnitB
+# CHECK-NEXT: [1.0] - V1UnitD
+# CHECK-NEXT: [1.1] - V1UnitD
+# CHECK-NEXT: [2]   - V1UnitL2
+# CHECK-NEXT: [3.0] - V1UnitL01
+# CHECK-NEXT: [3.1] - V1UnitL01
+# CHECK-NEXT: [4]   - V1UnitM0
+# CHECK-NEXT: [5]   - V1UnitM1
+# CHECK-NEXT: [6.0] - V1UnitS
+# CHECK-NEXT: [6.1] - V1UnitS
+# CHECK-NEXT: [7]   - V1UnitV0
+# CHECK-NEXT: [8]   - V1UnitV1
+# CHECK-NEXT: [9]   - V1UnitV2
+# CHECK-NEXT: [10]  - V1UnitV3
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    [9]    [10]
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    [9]    [10]   Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.48   0.50   0.01   0.01    -      -      -      -     mov	x0, x1
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.02    -     0.49   0.49    -      -      -      -     cmp	x0, #4
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     012345
+
+# CHECK:      [0,0]     DeER .   mov	x0, x1
+# CHECK-NEXT: [0,1]     D=eER.   cmp	x0, #4
+# CHECK-NEXT: [1,0]     DeE-R.   mov	x0, x1
+# CHECK-NEXT: [1,1]     D=eER.   cmp	x0, #4
+# CHECK-NEXT: [2,0]     DeE-R.   mov	x0, x1
+# CHECK-NEXT: [2,1]     D=eER.   cmp	x0, #4
+# CHECK-NEXT: [3,0]     DeE-R.   mov	x0, x1
+# CHECK-NEXT: [3,1]     D==eER   cmp	x0, #4
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     4     1.0    1.0    0.8       mov	x0, x1
+# CHECK-NEXT: 1.     4     2.3    0.3    0.0       cmp	x0, #4
+# CHECK-NEXT:        4     1.6    0.6    0.4       <total>

diff  --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index ee8830edeedbbe..980d9a39636ea7 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -962,7 +962,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "extern const MCRegisterDesc " << TargetName
      << "RegDesc[] = { // Descriptors\n";
-  OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n";
+  OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0, 0, 0 },\n";
 
   // Emit the register descriptors now.
   i = 0;
@@ -977,7 +977,8 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
        << DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
        << ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
        << (Offset << RegUnitBits | FirstRU) << ", "
-       << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
+       << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << ", " << Reg.Constant
+       << " },\n";
     ++i;
   }
   OS << "};\n\n"; // End of register descriptors...


        


More information about the llvm-commits mailing list