[llvm-branch-commits] [llvm] 993eaf2 - Recommit [TableGen][SchedModels] Fix read/write variant substitution
Evgeny Leviant via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Dec 4 10:55:16 PST 2020
Author: Evgeny Leviant
Date: 2020-12-04T21:50:34+03:00
New Revision: 993eaf2d69d8beb97e4695cbd919b927ed1cfe86
URL: https://github.com/llvm/llvm-project/commit/993eaf2d69d8beb97e4695cbd919b927ed1cfe86
DIFF: https://github.com/llvm/llvm-project/commit/993eaf2d69d8beb97e4695cbd919b927ed1cfe86.diff
LOG: Recommit [TableGen][SchedModels] Fix read/write variant substitution
Original commit rG112b3cb6ba49 introduced non-determinism in subtarget
generator due to iteration over DenseMap. New patch fixes this changing
ProcModelMapTy from DenseMap to std::map.
Added:
llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir
Modified:
llvm/lib/Target/ARM/ARMScheduleA57.td
llvm/utils/TableGen/CodeGenSchedule.cpp
llvm/utils/TableGen/CodeGenSchedule.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index be8591935810..0c610a4839f8 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -183,11 +183,6 @@ class A57BranchForm<SchedWriteRes non_br> :
// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
// Why more complex instruction uses more simple pipeline?
// May be an error in doc.
-def A57WriteALUsi : SchedWriteVariant<[
- // lsl #2, lsl #1, or lsr #1.
- SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>,
- SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
-]>;
def A57WriteALUsr : SchedWriteVariant<[
SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
@@ -200,7 +195,7 @@ def A57ReadALUsr : SchedReadVariant<[
SchedVar<IsPredicatedPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>
]>;
-def : SchedAlias<WriteALUsi, A57WriteALUsi>;
+def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
def : SchedAlias<WriteALUsr, A57WriteALUsr>;
def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
def : SchedAlias<ReadALUsr, A57ReadALUsr>;
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir b/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir
new file mode 100644
index 000000000000..03241c48045d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-mla.mir
@@ -0,0 +1,34 @@
+# RUN: llc -mcpu=cortex-a57 -mtriple=thumb -enable-misched -run-pass=machine-scheduler -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s
+
+# CHECK-LABEL: ********** MI Scheduling **********
+# CHECK: %[[RES:[0-9]+]]:rgpr = t2MLA
+# CHECK-NEXT: # preds left
+# CHECK-NEXT: # succs left
+# CHECK-NEXT: # rdefs left
+# CHECK-NEXT: Latency : 3
+# CHECK-NEXT: Depth
+# CHECK-NEXT: Height
+# CHECK-NEXT: Predecessors:
+# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
+# CHECK-NEXT: SU({{.*}}): Out Latency=
+# CHECK-NEXT: SU({{.*}}): Data Latency=1 Reg=
+# CHECK-NEXT: Successors:
+# CHECK-NEXT: SU([[SMLA_SU:[0-9]+]]): Data Latency=1 Reg=%[[RES]]
+# CHECK-NEXT: Pressure Diff
+# CHECK-NEXT: Single Issue : false;
+# CHECK-NEXT: SU([[SMLA_SU]]): {{.*}} = t2SMLAL %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr, %{{[0-9]+}}:rgpr(tied-def 0), %[[RES]]:rgpr(tied-def 1), 14, $noreg
+
+name: test_smlal_forwarding
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r1, $r3, $r4, $r5, $r6
+ %1:rgpr = COPY $r1
+ %3:rgpr = COPY $r3
+ %4:rgpr = COPY $r4
+ %5:rgpr = COPY $r5
+ %6:rgpr = COPY $r6
+ %3:rgpr = t2MLA %4:rgpr, %1:rgpr, %4:rgpr, 14, $noreg
+ %6:rgpr, %5:rgpr = t2SMLAL %5:rgpr, %6:rgpr, %4:rgpr, %3:rgpr, 14, $noreg
+ $r0 = COPY %6:rgpr
+ BX_RET 14, $noreg, implicit $r0
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index 82ca35f37e8b..50ee9462d1d1 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -283,7 +283,7 @@ static APInt constructOperandMask(ArrayRef<int64_t> Indices) {
static void
processSTIPredicate(STIPredicateFunction &Fn,
- const DenseMap<Record *, unsigned> &ProcModelMap) {
+ const ProcModelMapTy &ProcModelMap) {
DenseMap<const Record *, unsigned> Opcode2Index;
using OpcodeMapPair = std::pair<const Record *, OpcodeInfo>;
std::vector<OpcodeMapPair> OpcodeMappings;
@@ -1338,8 +1338,7 @@ class PredTransitions {
PredTransitions(CodeGenSchedModels &sm): SchedModels(sm) {}
bool substituteVariantOperand(const SmallVectorImpl<unsigned> &RWSeq,
- bool IsRead, bool IsForAnyCPU,
- unsigned StartIdx);
+ bool IsRead, unsigned StartIdx);
bool substituteVariants(const PredTransition &Trans);
@@ -1413,29 +1412,6 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
return false;
}
-static bool hasAliasedVariants(const CodeGenSchedRW &RW,
- CodeGenSchedModels &SchedModels) {
- if (RW.HasVariants)
- return true;
-
- for (Record *Alias : RW.Aliases) {
- const CodeGenSchedRW &AliasRW =
- SchedModels.getSchedRW(Alias->getValueAsDef("AliasRW"));
- if (AliasRW.HasVariants)
- return true;
- if (AliasRW.IsSequence) {
- IdxVec ExpandedRWs;
- SchedModels.expandRWSequence(AliasRW.Index, ExpandedRWs, AliasRW.IsRead);
- for (unsigned SI : ExpandedRWs) {
- if (hasAliasedVariants(SchedModels.getSchedRW(SI, AliasRW.IsRead),
- SchedModels))
- return true;
- }
- }
- }
- return false;
-}
-
static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants,
ArrayRef<unsigned> ProcIndices) {
std::vector<Record *> Preds;
@@ -1613,21 +1589,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
// starts. RWSeq must be applied to all transitions between StartIdx and the end
// of TransVec.
bool PredTransitions::substituteVariantOperand(
- const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, bool IsForAnyCPU,
- unsigned StartIdx) {
-
- auto CollectAndAddVariants = [&](unsigned TransIdx,
- const CodeGenSchedRW &SchedRW) {
- // Distribute this partial PredTransition across intersecting variants.
- // This will push a copies of TransVec[TransIdx] on the back of TransVec.
- std::vector<TransVariant> IntersectingVariants;
- getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
- // Now expand each variant on top of its copy of the transition.
- for (const TransVariant &IV : IntersectingVariants)
- pushVariant(IV, IsRead);
- return !IntersectingVariants.empty();
- };
-
+ const SmallVectorImpl<unsigned> &RWSeq, bool IsRead, unsigned StartIdx) {
bool Subst = false;
// Visit each original RW within the current sequence.
for (SmallVectorImpl<unsigned>::const_iterator
@@ -1636,35 +1598,24 @@ bool PredTransitions::substituteVariantOperand(
// Push this RW on all partial PredTransitions or distribute variants.
// New PredTransitions may be pushed within this loop which should not be
// revisited (TransEnd must be loop invariant).
- bool HasAliases = false, WasPushed = false;
for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size();
TransIdx != TransEnd; ++TransIdx) {
- // In the common case, push RW onto the current operand's sequence.
- if (!hasAliasedVariants(SchedRW, SchedModels)) {
+ // Distribute this partial PredTransition across intersecting variants.
+ // This will push a copies of TransVec[TransIdx] on the back of TransVec.
+ std::vector<TransVariant> IntersectingVariants;
+ getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants);
+ // Now expand each variant on top of its copy of the transition.
+ for (const TransVariant &IV : IntersectingVariants)
+ pushVariant(IV, IsRead);
+ if (IntersectingVariants.empty()) {
if (IsRead)
TransVec[TransIdx].ReadSequences.back().push_back(*RWI);
else
TransVec[TransIdx].WriteSequences.back().push_back(*RWI);
continue;
+ } else {
+ Subst = true;
}
- HasAliases = true;
- WasPushed |= CollectAndAddVariants(TransIdx, SchedRW);
- Subst |= WasPushed;
- }
- if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) {
- // If we're here this means that in some sched class:
- // a) We have read variant for CPU A
- // b) We have write variant for CPU B
- // b) We don't have write variant for CPU A
- // d) We must expand all read/write variants (IsForAnyCPU is true)
- // e) We couldn't expand SchedRW because TransVec doesn't have
- // any transition with compatible CPU ID.
- // In such case we create new empty transition with zero (AnyCPU)
- // index.
- TransVec.reserve(TransVec.size() + 1);
- TransVec.emplace_back(TransVec[StartIdx].PredTerm);
- TransVec.back().ReadSequences.emplace_back();
- Subst |= CollectAndAddVariants(TransVec.size() - 1, SchedRW);
}
}
return Subst;
@@ -1683,7 +1634,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
bool Subst = false;
TransVec.emplace_back(Trans.PredTerm, Trans.ProcIndices);
- bool IsForAnyCPU = llvm::count(Trans.ProcIndices, 0);
+ assert(!llvm::count(Trans.ProcIndices, 0));
// Visit each original write sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
@@ -1693,8 +1644,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->WriteSequences.emplace_back();
}
- Subst |=
- substituteVariantOperand(*WSI, /*IsRead=*/false, IsForAnyCPU, StartIdx);
+ Subst |= substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
}
// Visit each original read sequence.
for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
@@ -1705,8 +1655,7 @@ bool PredTransitions::substituteVariants(const PredTransition &Trans) {
TransVec.begin() + StartIdx, E = TransVec.end(); I != E; ++I) {
I->ReadSequences.emplace_back();
}
- Subst |=
- substituteVariantOperand(*RSI, /*IsRead=*/true, IsForAnyCPU, StartIdx);
+ Subst |= substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx);
}
return Subst;
}
@@ -1745,6 +1694,10 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
// requires creating a new SchedClass.
for (ArrayRef<PredTransition>::iterator
I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
+ // Variant expansion (substituteVariants) may create unconditional
+ // transitions. We don't need to build sched classes for them.
+ if (I->PredTerm.empty())
+ continue;
IdxVec OperWritesVariant, OperReadsVariant;
addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false);
addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true);
@@ -1777,6 +1730,26 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
}
}
+std::vector<unsigned> CodeGenSchedModels::getAllProcIndices() const {
+ std::vector<unsigned> ProcIdVec;
+ for (const auto &PM : ProcModelMap)
+ if (PM.second != 0)
+ ProcIdVec.push_back(PM.second);
+ return ProcIdVec;
+}
+
+static std::vector<PredTransition>
+makePerProcessorTransitions(const PredTransition &Trans,
+ ArrayRef<unsigned> ProcIndices) {
+ std::vector<PredTransition> PerCpuTransVec;
+ for (unsigned ProcId : ProcIndices) {
+ assert(ProcId != 0);
+ PerCpuTransVec.push_back(Trans);
+ PerCpuTransVec.back().ProcIndices.assign(1, ProcId);
+ }
+ return PerCpuTransVec;
+}
+
// Create new SchedClasses for the given ReadWrite list. If any of the
// ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
// of the ReadWrite list, following Aliases if necessary.
@@ -1812,6 +1785,10 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
}
LLVM_DEBUG(dbgs() << '\n');
+ LastTransitions = makePerProcessorTransitions(
+ LastTransitions[0], llvm::count(ProcIndices, 0)
+ ? ArrayRef<unsigned>(getAllProcIndices())
+ : ProcIndices);
// Collect all PredTransitions for individual operands.
// Iterate until no variant writes remain.
bool SubstitutedAny;
@@ -1823,9 +1800,6 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
LLVM_DEBUG(Transitions.dump());
LastTransitions.swap(Transitions.TransVec);
} while (SubstitutedAny);
- // If the first transition has no variants, nothing to do.
- if (LastTransitions[0].PredTerm.empty())
- return;
// WARNING: We are about to mutate the SchedClasses vector. Do not refer to
// OperWrites, OperReads, or ProcIndices after calling inferFromTransitions.
diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h
index 62a359e0888c..311adafdf977 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/llvm/utils/TableGen/CodeGenSchedule.h
@@ -21,6 +21,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
+#include <map>
namespace llvm {
@@ -409,6 +410,8 @@ class STIPredicateFunction {
ArrayRef<OpcodeGroup> getGroups() const { return Groups; }
};
+using ProcModelMapTy = std::map<const Record*, unsigned>;
+
/// Top level container for machine model data.
class CodeGenSchedModels {
RecordKeeper &Records;
@@ -421,7 +424,6 @@ class CodeGenSchedModels {
std::vector<CodeGenProcModel> ProcModels;
// Map Processor's MachineModel or ProcItin to a CodeGenProcModel index.
- using ProcModelMapTy = DenseMap<Record*, unsigned>;
ProcModelMapTy ProcModelMap;
// Per-operand SchedReadWrite types.
@@ -443,6 +445,7 @@ class CodeGenSchedModels {
InstClassMapTy InstrClassMap;
std::vector<STIPredicateFunction> STIPredicates;
+ std::vector<unsigned> getAllProcIndices() const;
public:
CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT);
More information about the llvm-branch-commits
mailing list