[llvm] cc96a82 - [TableGen][SchedModels] Fix read/write variant substitution

Mon Nov 2 06:39:12 PST 2020

Author: Evgeny Leviant
Date: 2020-11-02T17:39:04+03:00
New Revision: cc96a822917c1d95312a1fdb24e1fc0d5fc925b5

URL: https://github.com/llvm/llvm-project/commit/cc96a822917c1d95312a1fdb24e1fc0d5fc925b5
DIFF: https://github.com/llvm/llvm-project/commit/cc96a822917c1d95312a1fdb24e1fc0d5fc925b5.diff

LOG: [TableGen][SchedModels] Fix read/write variant substitution

Patch fixes case when sched class has write and read variants belonging
to different processor models.

Differential revision: https://reviews.llvm.org/D89777

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMScheduleA57.td
    llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
    llvm/utils/TableGen/CodeGenSchedule.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index 9a541e1c4331f..b5e745129c0ff 100644

--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -270,7 +270,11 @@ def : ReadAdvance<ReadMUL, 0>;
 // from similar μops, allowing a typical sequence of multiply-accumulate μops
 // to issue one every 1 cycle (sched advance = 2).
 def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
-def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
+def A57WriteMLAL : SchedWriteVariant<[
+  SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
+  SchedVar<NoSchedPred,       [A57Write_4cyc_1M]>
+]>;
+
 def A57ReadMLA  : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
 
 def : InstRW<[A57WriteMLA],

diff  --git a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
index 320a8a10cfa4b..1b98770b758b4 100644
--- a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
@@ -1421,9 +1421,9 @@
 # CHECK-NEXT:  1      3     1.00                        smladeq	r2, r3, r5, r8
 # CHECK-NEXT:  1      3     1.00                        smladxhi	r2, r3, r5, r8
 # CHECK-NEXT:  2      4     2.00                        smlal	r2, r3, r5, r8
-# CHECK-NEXT:  2      4     2.00                        smlals	r2, r3, r5, r8
+# CHECK-NEXT:  4      5     2.00                        smlals	r2, r3, r5, r8
 # CHECK-NEXT:  2      4     2.00                        smlaleq	r2, r3, r5, r8
-# CHECK-NEXT:  2      4     2.00                        smlalshi	r2, r3, r5, r8
+# CHECK-NEXT:  4      5     2.00                        smlalshi	r2, r3, r5, r8
 # CHECK-NEXT:  2      4     2.00                        smlalbb	r3, r1, r9, r0
 # CHECK-NEXT:  2      4     2.00                        smlalbt	r5, r6, r4, r1
 # CHECK-NEXT:  2      4     2.00                        smlaltb	r4, r2, r3, r2
@@ -1634,12 +1634,12 @@
 # CHECK-NEXT:  2      4     2.00                        umaallt	r3, r4, r5, r6
 # CHECK-NEXT:  2      4     2.00                        umlal	r2, r4, r6, r8
 # CHECK-NEXT:  2      4     2.00                        umlalgt	r6, r1, r2, r6
-# CHECK-NEXT:  2      4     2.00                        umlals	r2, r9, r2, r3
-# CHECK-NEXT:  2      4     2.00                        umlalseq	r3, r5, r1, r2
+# CHECK-NEXT:  4      5     2.00                        umlals	r2, r9, r2, r3
+# CHECK-NEXT:  4      5     2.00                        umlalseq	r3, r5, r1, r2
 # CHECK-NEXT:  2      4     2.00                        umull	r2, r4, r6, r8
 # CHECK-NEXT:  2      4     2.00                        umullgt	r6, r1, r2, r6
-# CHECK-NEXT:  2      4     2.00                        umulls	r2, r9, r2, r3
-# CHECK-NEXT:  2      4     2.00                        umullseq	r3, r5, r1, r2
+# CHECK-NEXT:  4      5     2.00                        umulls	r2, r9, r2, r3
+# CHECK-NEXT:  4      5     2.00                        umullseq	r3, r5, r1, r2
 # CHECK-NEXT:  1      2     1.00                        uqadd16	r1, r2, r3
 # CHECK-NEXT:  1      2     1.00                        uqadd16gt	r4, r7, r9
 # CHECK-NEXT:  1      2     1.00                        uqadd8	r3, r4, r8
@@ -1719,7 +1719,7 @@
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1.0]  [1.1]  [2]    [3]    [4]    [5]    [6]
-# CHECK-NEXT: 8.00   133.00 133.00 53.00  522.00 12.00   -      -
+# CHECK-NEXT: 8.00   139.00 139.00 53.00  522.00 12.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1.0]  [1.1]  [2]    [3]    [4]    [5]    [6]    Instructions:
@@ -2285,9 +2285,9 @@
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     smladeq	r2, r3, r5, r8
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     smladxhi	r2, r3, r5, r8
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     smlal	r2, r3, r5, r8
-# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     smlals	r2, r3, r5, r8
+# CHECK-NEXT:  -     1.00   1.00    -     2.00    -      -      -     smlals	r2, r3, r5, r8
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     smlaleq	r2, r3, r5, r8
-# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     smlalshi	r2, r3, r5, r8
+# CHECK-NEXT:  -     1.00   1.00    -     2.00    -      -      -     smlalshi	r2, r3, r5, r8
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     smlalbb	r3, r1, r9, r0
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     smlalbt	r5, r6, r4, r1
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     smlaltb	r4, r2, r3, r2
@@ -2498,12 +2498,12 @@
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umaallt	r3, r4, r5, r6
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umlal	r2, r4, r6, r8
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umlalgt	r6, r1, r2, r6
-# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umlals	r2, r9, r2, r3
-# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umlalseq	r3, r5, r1, r2
+# CHECK-NEXT:  -     1.00   1.00    -     2.00    -      -      -     umlals	r2, r9, r2, r3
+# CHECK-NEXT:  -     1.00   1.00    -     2.00    -      -      -     umlalseq	r3, r5, r1, r2
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umull	r2, r4, r6, r8
 # CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umullgt	r6, r1, r2, r6
-# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umulls	r2, r9, r2, r3
-# CHECK-NEXT:  -      -      -      -     2.00    -      -      -     umullseq	r3, r5, r1, r2
+# CHECK-NEXT:  -     1.00   1.00    -     2.00    -      -      -     umulls	r2, r9, r2, r3
+# CHECK-NEXT:  -     1.00   1.00    -     2.00    -      -      -     umullseq	r3, r5, r1, r2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uqadd16	r1, r2, r3
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uqadd16gt	r4, r7, r9
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uqadd8	r3, r4, r8

diff  --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index be12758b23bea..71fd4ec1e07d8 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -1315,6 +1315,16 @@ struct PredTransition {
   SmallVector<SmallVector<unsigned,4>, 16> WriteSequences;
   SmallVector<SmallVector<unsigned,4>, 16> ReadSequences;
   SmallVector<unsigned, 4> ProcIndices;
+
+  PredTransition() = default;
+  PredTransition(ArrayRef<PredCheck> PT) {
+    PredTerm.assign(PT.begin(), PT.end());
+    ProcIndices.assign(1, 0);
+  }
+  PredTransition(ArrayRef<PredCheck> PT, ArrayRef<unsigned> PIds) {
+    PredTerm.assign(PT.begin(), PT.end());
+    ProcIndices.assign(PIds.begin(), PIds.end());
+  }
 };
 
 // Encapsulate a set of partially constructed transitions.
@@ -1328,7 +1338,8 @@ class PredTransitions {
   PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
 
   void substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
-                                bool IsRead, unsigned StartIdx);
+                                bool IsRead, bool IsForAnyCPU,
+                                unsigned StartIdx);
 
   void substituteVariants(const PredTransition &Trans);
 
@@ -1568,7 +1579,20 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
 // starts. RWSeq must be applied to all transitions between StartIdx and the end
 // of TransVec.
 void PredTransitions::substituteVariantOperand(
-  const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
+    const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
+    unsigned StartIdx) {
+
+  auto CollectAndAddVariants = [&](unsigned TransIdx,
+                                   const CodeGenSchedRW &SchedRW) {
+    // Distribute this partial PredTransition across intersecting variants.
+    // This will push a copies of TransVec[TransIdx] on the back of TransVec.
+    std::vector<TransVariant> IntersectingVariants;
+    getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
+    // Now expand each variant on top of its copy of the transition.
+    for (const TransVariant &IV : IntersectingVariants)
+      pushVariant(IV, IsRead);
+    return !IntersectingVariants.empty();
+  };
 
   // Visit each original RW within the current sequence.
   for (SmallVectorImpl<unsigned>::const_iterator
@@ -1577,6 +1601,7 @@ void PredTransitions::substituteVariantOperand(
     // Push this RW on all partial PredTransitions or distribute variants.
     // New PredTransitions may be pushed within this loop which should not be
     // revisited (TransEnd must be loop invariant).
+    bool HasAliases = false, WasPushed = false;
     for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
          TransIdx != TransEnd; ++TransIdx) {
       // In the common case, push RW onto the current operand's sequence.
@@ -1587,17 +1612,22 @@ void PredTransitions::substituteVariantOperand(
           TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
         continue;
       }
-      // Distribute this partial PredTransition across intersecting variants.
-      // This will push a copies of TransVec[TransIdx] on the back of TransVec.
-      std::vector<TransVariant> IntersectingVariants;
-      getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
-      // Now expand each variant on top of its copy of the transition.
-      for (std::vector<TransVariant>::const_iterator
-             IVI = IntersectingVariants.begin(),
-             IVE = IntersectingVariants.end();
-           IVI != IVE; ++IVI) {
-        pushVariant(*IVI, IsRead);
-      }
+      HasAliases = true;
+      WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
+    }
+    if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
+      // If we're here this means that in some sched class:
+      // a) We have read variant for CPU A
+      // b) We have write variant for CPU B
+      // b) We don't have write variant for CPU A
+      // d) We must expand all read/write variants (IsForAnyCPU is true)
+      // e) We couldn't expand SchedRW because TransVec doesn't have
+      //    any transition with compatible CPU ID.
+      // In such case we create new empty transition with zero (AnyCPU)
+      // index.
+      TransVec.emplace_back(TransVec[StartIdx].PredTerm);
+      TransVec.back().ReadSequences.emplace_back();
+      CollectAndAddVariants(TransVec.size() - 1, SchedRW);
     }
   }
 }
@@ -1612,10 +1642,9 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
   // Build up a set of partial results starting at the back of
   // PredTransitions. Remember the first new transition.
   unsigned StartIdx = TransVec.size();
-  TransVec.emplace_back();
-  TransVec.back().PredTerm = Trans.PredTerm;
-  TransVec.back().ProcIndices = Trans.ProcIndices;
+  TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
 
+  bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
   // Visit each original write sequence.
   for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
          WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
@@ -1625,7 +1654,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
            TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
       I->WriteSequences.emplace_back();
     }
-    substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
+    substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
   }
   // Visit each original read sequence.
   for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@@ -1636,7 +1665,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
            TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
       I->ReadSequences.emplace_back();
     }
-    substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
+    substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
   }
 }