[llvm] cc96a82 - [TableGen][SchedModels] Fix read/write variant substitution
Evgeny Leviant via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 2 06:39:12 PST 2020
Author: Evgeny Leviant
Date: 2020-11-02T17:39:04+03:00
New Revision: cc96a822917c1d95312a1fdb24e1fc0d5fc925b5
URL: https://github.com/llvm/llvm-project/commit/cc96a822917c1d95312a1fdb24e1fc0d5fc925b5
DIFF: https://github.com/llvm/llvm-project/commit/cc96a822917c1d95312a1fdb24e1fc0d5fc925b5.diff
LOG: [TableGen][SchedModels] Fix read/write variant substitution
Patch fixes case when sched class has write and read variants belonging
to different processor models.
Differential revision: https://reviews.llvm.org/D89777
Added:
Modified:
llvm/lib/Target/ARM/ARMScheduleA57.td
llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
llvm/utils/TableGen/CodeGenSchedule.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index 9a541e1c4331f..b5e745129c0ff 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -270,7 +270,11 @@ def : ReadAdvance<ReadMUL, 0>;
// from similar μops, allowing a typical sequence of multiply-accumulate μops
// to issue one every 1 cycle (sched advance = 2).
def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
-def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
+def A57WriteMLAL : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1M]>
+]>;
+
def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
def : InstRW<[A57WriteMLA],
diff --git a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
index 320a8a10cfa4b..1b98770b758b4 100644
--- a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
@@ -1421,9 +1421,9 @@
# CHECK-NEXT: 1 3 1.00 smladeq r2, r3, r5, r8
# CHECK-NEXT: 1 3 1.00 smladxhi r2, r3, r5, r8
# CHECK-NEXT: 2 4 2.00 smlal r2, r3, r5, r8
-# CHECK-NEXT: 2 4 2.00 smlals r2, r3, r5, r8
+# CHECK-NEXT: 4 5 2.00 smlals r2, r3, r5, r8
# CHECK-NEXT: 2 4 2.00 smlaleq r2, r3, r5, r8
-# CHECK-NEXT: 2 4 2.00 smlalshi r2, r3, r5, r8
+# CHECK-NEXT: 4 5 2.00 smlalshi r2, r3, r5, r8
# CHECK-NEXT: 2 4 2.00 smlalbb r3, r1, r9, r0
# CHECK-NEXT: 2 4 2.00 smlalbt r5, r6, r4, r1
# CHECK-NEXT: 2 4 2.00 smlaltb r4, r2, r3, r2
@@ -1634,12 +1634,12 @@
# CHECK-NEXT: 2 4 2.00 umaallt r3, r4, r5, r6
# CHECK-NEXT: 2 4 2.00 umlal r2, r4, r6, r8
# CHECK-NEXT: 2 4 2.00 umlalgt r6, r1, r2, r6
-# CHECK-NEXT: 2 4 2.00 umlals r2, r9, r2, r3
-# CHECK-NEXT: 2 4 2.00 umlalseq r3, r5, r1, r2
+# CHECK-NEXT: 4 5 2.00 umlals r2, r9, r2, r3
+# CHECK-NEXT: 4 5 2.00 umlalseq r3, r5, r1, r2
# CHECK-NEXT: 2 4 2.00 umull r2, r4, r6, r8
# CHECK-NEXT: 2 4 2.00 umullgt r6, r1, r2, r6
-# CHECK-NEXT: 2 4 2.00 umulls r2, r9, r2, r3
-# CHECK-NEXT: 2 4 2.00 umullseq r3, r5, r1, r2
+# CHECK-NEXT: 4 5 2.00 umulls r2, r9, r2, r3
+# CHECK-NEXT: 4 5 2.00 umullseq r3, r5, r1, r2
# CHECK-NEXT: 1 2 1.00 uqadd16 r1, r2, r3
# CHECK-NEXT: 1 2 1.00 uqadd16gt r4, r7, r9
# CHECK-NEXT: 1 2 1.00 uqadd8 r3, r4, r8
@@ -1719,7 +1719,7 @@
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6]
-# CHECK-NEXT: 8.00 133.00 133.00 53.00 522.00 12.00 - -
+# CHECK-NEXT: 8.00 139.00 139.00 53.00 522.00 12.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions:
@@ -2285,9 +2285,9 @@
# CHECK-NEXT: - - - - 1.00 - - - smladeq r2, r3, r5, r8
# CHECK-NEXT: - - - - 1.00 - - - smladxhi r2, r3, r5, r8
# CHECK-NEXT: - - - - 2.00 - - - smlal r2, r3, r5, r8
-# CHECK-NEXT: - - - - 2.00 - - - smlals r2, r3, r5, r8
+# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - smlals r2, r3, r5, r8
# CHECK-NEXT: - - - - 2.00 - - - smlaleq r2, r3, r5, r8
-# CHECK-NEXT: - - - - 2.00 - - - smlalshi r2, r3, r5, r8
+# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - smlalshi r2, r3, r5, r8
# CHECK-NEXT: - - - - 2.00 - - - smlalbb r3, r1, r9, r0
# CHECK-NEXT: - - - - 2.00 - - - smlalbt r5, r6, r4, r1
# CHECK-NEXT: - - - - 2.00 - - - smlaltb r4, r2, r3, r2
@@ -2498,12 +2498,12 @@
# CHECK-NEXT: - - - - 2.00 - - - umaallt r3, r4, r5, r6
# CHECK-NEXT: - - - - 2.00 - - - umlal r2, r4, r6, r8
# CHECK-NEXT: - - - - 2.00 - - - umlalgt r6, r1, r2, r6
-# CHECK-NEXT: - - - - 2.00 - - - umlals r2, r9, r2, r3
-# CHECK-NEXT: - - - - 2.00 - - - umlalseq r3, r5, r1, r2
+# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umlals r2, r9, r2, r3
+# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umlalseq r3, r5, r1, r2
# CHECK-NEXT: - - - - 2.00 - - - umull r2, r4, r6, r8
# CHECK-NEXT: - - - - 2.00 - - - umullgt r6, r1, r2, r6
-# CHECK-NEXT: - - - - 2.00 - - - umulls r2, r9, r2, r3
-# CHECK-NEXT: - - - - 2.00 - - - umullseq r3, r5, r1, r2
+# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umulls r2, r9, r2, r3
+# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umullseq r3, r5, r1, r2
# CHECK-NEXT: - - - - 1.00 - - - uqadd16 r1, r2, r3
# CHECK-NEXT: - - - - 1.00 - - - uqadd16gt r4, r7, r9
# CHECK-NEXT: - - - - 1.00 - - - uqadd8 r3, r4, r8
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index be12758b23bea..71fd4ec1e07d8 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -1315,6 +1315,16 @@ struct PredTransition {
SmallVector<SmallVector<unsigned,4>, 16> WriteSequences;
SmallVector<SmallVector<unsigned,4>, 16> ReadSequences;
SmallVector<unsigned, 4> ProcIndices;
+
+ PredTransition() = default;
+ PredTransition(ArrayRef<PredCheck> PT) {
+ PredTerm.assign(PT.begin(), PT.end());
+ ProcIndices.assign(1, 0);
+ }
+ PredTransition(ArrayRef<PredCheck> PT, ArrayRef<unsigned> PIds) {
+ PredTerm.assign(PT.begin(), PT.end());
+ ProcIndices.assign(PIds.begin(), PIds.end());
+ }
};
// Encapsulate a set of partially constructed transitions.
@@ -1328,7 +1338,8 @@ class PredTransitions {
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
void substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
- bool IsRead, unsigned StartIdx);
+ bool IsRead, bool IsForAnyCPU,
+ unsigned StartIdx);
void substituteVariants(const PredTransition &Trans);
@@ -1568,7 +1579,20 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
// starts. RWSeq must be applied to all transitions between StartIdx and the end
// of TransVec.
void PredTransitions::substituteVariantOperand(
- const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
+ const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
+ unsigned StartIdx) {
+
+ auto CollectAndAddVariants = [&](unsigned TransIdx,
+ const CodeGenSchedRW &SchedRW) {
+ // Distribute this partial PredTransition across intersecting variants.
+ // This will push a copies of TransVec[TransIdx] on the back of TransVec.
+ std::vector<TransVariant> IntersectingVariants;
+ getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
+ // Now expand each variant on top of its copy of the transition.
+ for (const TransVariant &IV : IntersectingVariants)
+ pushVariant(IV, IsRead);
+ return !IntersectingVariants.empty();
+ };
// Visit each original RW within the current sequence.
for (SmallVectorImpl<unsigned>::const_iterator
@@ -1577,6 +1601,7 @@ void PredTransitions::substituteVariantOperand(
// Push this RW on all partial PredTransitions or distribute variants.
// New PredTransitions may be pushed within this loop which should not be
// revisited (TransEnd must be loop invariant).
+ bool HasAliases = false, WasPushed = false;
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
TransIdx != TransEnd; ++TransIdx) {
// In the common case, push RW onto the current operand's sequence.
@@ -1587,17 +1612,22 @@ void PredTransitions::substituteVariantOperand(
TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
continue;
}
- // Distribute this partial PredTransition across intersecting variants.
- // This will push a copies of TransVec[TransIdx] on the back of TransVec.
- std::vector<TransVariant> IntersectingVariants;
- getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
- // Now expand each variant on top of its copy of the transition.
- for (std::vector<TransVariant>::const_iterator
- IVI = IntersectingVariants.begin(),
- IVE = IntersectingVariants.end();
- IVI != IVE; ++IVI) {
- pushVariant(*IVI, IsRead);
- }
+ HasAliases = true;
+ WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
+ }
+ if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
+ // If we're here this means that in some sched class:
+ // a) We have read variant for CPU A
+ // b) We have write variant for CPU B
+ // b) We don't have write variant for CPU A
+ // d) We must expand all read/write variants (IsForAnyCPU is true)
+ // e) We couldn't expand SchedRW because TransVec doesn't have
+ // any transition with compatible CPU ID.
+ // In such case we create new empty transition with zero (AnyCPU)
+ // index.
+ TransVec.emplace_back(TransVec[StartIdx].PredTerm);
+ TransVec.back().ReadSequences.emplace_back();
+ CollectAndAddVariants(TransVec.size() - 1, SchedRW);
}
}
}
@@ -1612,10 +1642,9 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
// Build up a set of partial results starting at the back of
// PredTransitions. Remember the first new transition.
unsigned StartIdx = TransVec.size();
- TransVec.emplace_back();
- TransVec.back().PredTerm = Trans.PredTerm;
- TransVec.back().ProcIndices = Trans.ProcIndices;
+ TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
+ bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
// Visit each original write sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
@@ -1625,7 +1654,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->WriteSequences.emplace_back();
}
- substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
+ substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
}
// Visit each original read sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@@ -1636,7 +1665,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->ReadSequences.emplace_back();
}
- substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
+ substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
}
}
More information about the llvm-commits
mailing list