[llvm] [WIP][RFC] Teach MCA constant registers do not create dependencies (PR #89387)

Rin Dobrescu via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 03:33:27 PDT 2024


https://github.com/Rin18 updated https://github.com/llvm/llvm-project/pull/89387

>From 7d29ddb340ecae3eaf0ab7382efd26388daae1f7 Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Thu, 18 Apr 2024 13:09:52 +0000
Subject: [PATCH 1/4] Precommit test

---
 .../AArch64/Neoverse/V1-zero-dependency.s     | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s

diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
new file mode 100644
index 00000000000000..ac3b6d55272a8c
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -0,0 +1,77 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v1 --timeline --timeline-max-iterations=4 < %s | FileCheck %s
+
+mov x0, x1
+cmp x0, #4
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      200
+# CHECK-NEXT: Total Cycles:      203
+# CHECK-NEXT: Total uOps:        200
+
+# CHECK:      Dispatch Width:    15
+# CHECK-NEXT: uOps Per Cycle:    0.99
+# CHECK-NEXT: IPC:               0.99
+# CHECK-NEXT: Block RThroughput: 0.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      1     0.25                        mov	x0, x1
+# CHECK-NEXT:  1      1     0.33                        cmp	x0, #4
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0.0] - V1UnitB
+# CHECK-NEXT: [0.1] - V1UnitB
+# CHECK-NEXT: [1.0] - V1UnitD
+# CHECK-NEXT: [1.1] - V1UnitD
+# CHECK-NEXT: [2]   - V1UnitL2
+# CHECK-NEXT: [3.0] - V1UnitL01
+# CHECK-NEXT: [3.1] - V1UnitL01
+# CHECK-NEXT: [4]   - V1UnitM0
+# CHECK-NEXT: [5]   - V1UnitM1
+# CHECK-NEXT: [6.0] - V1UnitS
+# CHECK-NEXT: [6.1] - V1UnitS
+# CHECK-NEXT: [7]   - V1UnitV0
+# CHECK-NEXT: [8]   - V1UnitV1
+# CHECK-NEXT: [9]   - V1UnitV2
+# CHECK-NEXT: [10]  - V1UnitV3
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    [9]    [10]
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    [9]    [10]   Instructions:
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     mov	x0, x1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     cmp	x0, #4
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeER .    .   mov	x0, x1
+# CHECK-NEXT: [0,1]     D=eER.    .   cmp	x0, #4
+# CHECK-NEXT: [1,0]     D==eER    .   mov	x0, x1
+# CHECK-NEXT: [1,1]     D===eER   .   cmp	x0, #4
+# CHECK-NEXT: [2,0]     D====eER  .   mov	x0, x1
+# CHECK-NEXT: [2,1]     D=====eER .   cmp	x0, #4
+# CHECK-NEXT: [3,0]     D======eER.   mov	x0, x1
+# CHECK-NEXT: [3,1]     D=======eER   cmp	x0, #4
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     4     4.0    0.3    0.0       mov	x0, x1
+# CHECK-NEXT: 1.     4     5.0    0.0    0.0       cmp	x0, #4
+# CHECK-NEXT:        4     4.5    0.1    0.0       <total>

>From f6cc74659d121b64a10fb8c87baa615670c456cb Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Fri, 19 Apr 2024 14:11:37 +0000
Subject: [PATCH 2/4] Add constant register check.

---
 llvm/include/llvm/MC/MCRegisterInfo.h         |  7 ++++
 llvm/lib/MCA/InstrBuilder.cpp                 | 17 +++++++--
 .../AArch64/Neoverse/V1-zero-dependency.s     | 37 +++++++++----------
 llvm/utils/TableGen/RegisterInfoEmitter.cpp   |  3 +-
 4 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index c648ef20fa84c6..4dbd4435a8c719 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -126,6 +126,9 @@ struct MCRegisterDesc {
   /// Index into list with lane mask sequences. The sequence contains a lanemask
   /// for every register unit.
   uint16_t RegUnitLaneMasks;
+
+  // Returns true for constant registers
+  bool Constant;
 };
 
 /// MCRegisterInfo base class - We assume that the target defines a static
@@ -382,6 +385,10 @@ class MCRegisterInfo {
     return RegStrings + get(RegNo).Name;
   }
 
+  bool isConstant(MCRegister RegNo) const {
+    return get(RegNo).Constant && MCRegister::isPhysicalRegister(RegNo.id());
+  }
+
   /// Return the number of registers this target has (useful for
   /// sizing arrays holding per register information)
   unsigned getNumRegs() const {
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 1a82e45763a267..ad558dd7cf858a 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -320,9 +320,9 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
 
   unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
   ID.Writes.resize(TotalDefs + NumVariadicOps);
-  // Iterate over the operands list, and skip non-register operands.
-  // The first NumExplicitDefs register operands are expected to be register
-  // definitions.
+  // Iterate over the operands list, and skip non-register or constant register
+  // operands. The first NumExplicitDefs register operands are expected to be
+  // register definitions.
   unsigned CurrentDef = 0;
   unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
   unsigned i = 0;
@@ -335,6 +335,10 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
       OptionalDefIdx = CurrentDef++;
       continue;
     }
+    if (MRI.isConstant(Op.getReg())) {
+      CurrentDef++;
+      continue;
+    }
 
     WriteDescriptor &Write = ID.Writes[CurrentDef];
     Write.OpIndex = i;
@@ -413,6 +417,8 @@ void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
     const MCOperand &Op = MCI.getOperand(OpIndex);
     if (!Op.isReg())
       continue;
+    if (MRI.isConstant(Op.getReg()))
+      continue;
 
     WriteDescriptor &Write = ID.Writes[CurrentDef];
     Write.OpIndex = OpIndex;
@@ -449,6 +455,9 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
     if (!Op.isReg())
       continue;
 
+    if (MRI.isConstant(Op.getReg()))
+      continue;
+
     ReadDescriptor &Read = ID.Reads[CurrentUse];
     Read.OpIndex = OpIndex;
     Read.UseIndex = I;
@@ -465,6 +474,8 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
     Read.OpIndex = ~I;
     Read.UseIndex = NumExplicitUses + I;
     Read.RegisterID = MCDesc.implicit_uses()[I];
+    if (MRI.isConstant(Read.RegisterID))
+      continue;
     Read.SchedClassID = SchedClassID;
     LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
                       << ", UseIndex=" << Read.UseIndex << ", RegisterID="
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
index ac3b6d55272a8c..071329fd00cdd2 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -6,12 +6,12 @@ cmp x0, #4
 
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      200
-# CHECK-NEXT: Total Cycles:      203
+# CHECK-NEXT: Total Cycles:      54
 # CHECK-NEXT: Total uOps:        200
 
 # CHECK:      Dispatch Width:    15
-# CHECK-NEXT: uOps Per Cycle:    0.99
-# CHECK-NEXT: IPC:               0.99
+# CHECK-NEXT: uOps Per Cycle:    3.70
+# CHECK-NEXT: IPC:               3.70
 # CHECK-NEXT: Block RThroughput: 0.3
 
 # CHECK:      Instruction Info:
@@ -45,25 +45,24 @@ cmp x0, #4
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    [9]    [10]
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    [9]    [10]   Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     1.00    -      -      -      -     mov	x0, x1
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00    -      -      -      -      -     cmp	x0, #4
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.48   0.50   0.01   0.01    -      -      -      -     mov	x0, x1
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.02    -     0.49   0.49    -      -      -      -     cmp	x0, #4
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345
 
-# CHECK:      [0,0]     DeER .    .   mov	x0, x1
-# CHECK-NEXT: [0,1]     D=eER.    .   cmp	x0, #4
-# CHECK-NEXT: [1,0]     D==eER    .   mov	x0, x1
-# CHECK-NEXT: [1,1]     D===eER   .   cmp	x0, #4
-# CHECK-NEXT: [2,0]     D====eER  .   mov	x0, x1
-# CHECK-NEXT: [2,1]     D=====eER .   cmp	x0, #4
-# CHECK-NEXT: [3,0]     D======eER.   mov	x0, x1
-# CHECK-NEXT: [3,1]     D=======eER   cmp	x0, #4
+# CHECK:      [0,0]     DeER .   mov	x0, x1
+# CHECK-NEXT: [0,1]     D=eER.   cmp	x0, #4
+# CHECK-NEXT: [1,0]     DeE-R.   mov	x0, x1
+# CHECK-NEXT: [1,1]     D=eER.   cmp	x0, #4
+# CHECK-NEXT: [2,0]     DeE-R.   mov	x0, x1
+# CHECK-NEXT: [2,1]     D=eER.   cmp	x0, #4
+# CHECK-NEXT: [3,0]     DeE-R.   mov	x0, x1
+# CHECK-NEXT: [3,1]     D==eER   cmp	x0, #4
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -72,6 +71,6 @@ cmp x0, #4
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     4     4.0    0.3    0.0       mov	x0, x1
-# CHECK-NEXT: 1.     4     5.0    0.0    0.0       cmp	x0, #4
-# CHECK-NEXT:        4     4.5    0.1    0.0       <total>
+# CHECK-NEXT: 0.     4     1.0    1.0    0.8       mov	x0, x1
+# CHECK-NEXT: 1.     4     2.3    0.3    0.0       cmp	x0, #4
+# CHECK-NEXT:        4     1.6    0.6    0.4       <total>
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index ee8830edeedbbe..f6f93154c454d3 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -977,7 +977,8 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
        << DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
        << ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
        << (Offset << RegUnitBits | FirstRU) << ", "
-       << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
+       << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << ", " << Reg.Constant
+       << " },\n";
     ++i;
   }
   OS << "};\n\n"; // End of register descriptors...

>From b9a89f6be46f562b828a8c22a915259015889a75 Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Mon, 22 Apr 2024 15:57:06 +0000
Subject: [PATCH 3/4] Fix test, add check and address comments.

---
 llvm/include/llvm/MC/MCRegisterInfo.h         |  9 ++++---
 llvm/lib/MCA/InstrBuilder.cpp                 |  3 +--
 .../AArch64/HiSilicon/tsv110-forwarding.s     | 24 +++++++++----------
 3 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index 4dbd4435a8c719..af5be9186108af 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -127,8 +127,8 @@ struct MCRegisterDesc {
   /// for every register unit.
   uint16_t RegUnitLaneMasks;
 
-  // Returns true for constant registers
-  bool Constant;
+  // Is true for constant registers.
+  bool IsConstant;
 };
 
 /// MCRegisterInfo base class - We assume that the target defines a static
@@ -385,9 +385,8 @@ class MCRegisterInfo {
     return RegStrings + get(RegNo).Name;
   }
 
-  bool isConstant(MCRegister RegNo) const {
-    return get(RegNo).Constant && MCRegister::isPhysicalRegister(RegNo.id());
-  }
+  /// Returns true if the given register is constant.
+  bool isConstant(MCRegister RegNo) const { return get(RegNo).IsConstant; }
 
   /// Return the number of registers this target has (useful for
   /// sizing arrays holding per register information)
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index ad558dd7cf858a..8a3d653f32e47c 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -454,7 +454,6 @@ void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
     const MCOperand &Op = MCI.getOperand(OpIndex);
     if (!Op.isReg())
       continue;
-
     if (MRI.isConstant(Op.getReg()))
       continue;
 
@@ -760,7 +759,7 @@ InstrBuilder::createInstruction(const MCInst &MCI,
     RegID = WD.isImplicitWrite() ? WD.RegisterID
                                  : MCI.getOperand(WD.OpIndex).getReg();
     // Check if this is a optional definition that references NoReg.
-    if (WD.IsOptionalDef && !RegID) {
+    if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {
       ++WriteIndex;
       continue;
     }
diff --git a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
index 207822b618396e..b29697ea7972b1 100644
--- a/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
+++ b/llvm/test/tools/llvm-mca/AArch64/HiSilicon/tsv110-forwarding.s
@@ -52,22 +52,22 @@ madd x0, x0, x0, x0
 
 # CHECK:      Iterations:        1
 # CHECK-NEXT: Instructions:      4
-# CHECK-NEXT: Total Cycles:      13
+# CHECK-NEXT: Total Cycles:      12
 # CHECK-NEXT: Total uOps:        4
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.31
-# CHECK-NEXT: IPC:               0.31
+# CHECK-NEXT: uOps Per Cycle:    0.33
+# CHECK-NEXT: IPC:               0.33
 # CHECK-NEXT: Block RThroughput: 4.0
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012
+# CHECK-NEXT:                     01
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeER   . .   mul	x0, x1, x2
-# CHECK-NEXT: [0,1]     D=eeeeER  . .   madd	x0, x1, x2, x0
-# CHECK-NEXT: [0,2]     D==eeeeER . .   madd	x0, x1, x2, x0
-# CHECK-NEXT: [0,3]     D======eeeeER   madd	x0, x0, x0, x0
+# CHECK:      [0,0]     DeeeeER   ..   mul	x0, x1, x2
+# CHECK-NEXT: [0,1]     D==eeeeER ..   madd	x0, x1, x2, x0
+# CHECK-NEXT: [0,2]     D=eeeeE-R ..   madd	x0, x1, x2, x0
+# CHECK-NEXT: [0,3]     D=====eeeeER   madd	x0, x0, x0, x0
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -77,7 +77,7 @@ madd x0, x0, x0, x0
 
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       mul	x0, x1, x2
-# CHECK-NEXT: 1.     1     2.0    0.0    0.0       madd	x0, x1, x2, x0
-# CHECK-NEXT: 2.     1     3.0    0.0    0.0       madd	x0, x1, x2, x0
-# CHECK-NEXT: 3.     1     7.0    0.0    0.0       madd	x0, x0, x0, x0
-# CHECK-NEXT:        1     3.3    0.3    0.0       <total>
+# CHECK-NEXT: 1.     1     3.0    3.0    0.0       madd	x0, x1, x2, x0
+# CHECK-NEXT: 2.     1     2.0    2.0    1.0       madd	x0, x1, x2, x0
+# CHECK-NEXT: 3.     1     6.0    0.0    0.0       madd	x0, x0, x0, x0
+# CHECK-NEXT:        1     3.0    1.5    0.3       <total>

>From a7573bc0be7886a62ebc43130d6af831ffbff728 Mon Sep 17 00:00:00 2001
From: Rin Dobrescu <rin.dobrescu at arm.com>
Date: Tue, 23 Apr 2024 10:30:28 +0000
Subject: [PATCH 4/4] Update print line before emitting register descriptors.

---
 llvm/utils/TableGen/RegisterInfoEmitter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index f6f93154c454d3..980d9a39636ea7 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -962,7 +962,7 @@ void RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "extern const MCRegisterDesc " << TargetName
      << "RegDesc[] = { // Descriptors\n";
-  OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n";
+  OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0, 0, 0 },\n";
 
   // Emit the register descriptors now.
   i = 0;



More information about the llvm-commits mailing list