[llvm] 50bd686 - Add support for branch forms of ALU instructions to Cortex-A57 model
Evgeny Leviant via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 24 00:43:59 PST 2020
Author: Evgeny Leviant
Date: 2020-11-24T11:43:51+03:00
New Revision: 50bd686695ac2ca25996be7994808f93a2b753c5
URL: https://github.com/llvm/llvm-project/commit/50bd686695ac2ca25996be7994808f93a2b753c5
DIFF: https://github.com/llvm/llvm-project/commit/50bd686695ac2ca25996be7994808f93a2b753c5.diff
LOG: Add support for branch forms of ALU instructions to Cortex-A57 model
Patch fixes scheduling of ALU instructions which modify pc register. Patch
also fixes computation of mutually exclusive predicates for sequences of
variants to be properly expanded
Differential revision: https://reviews.llvm.org/D91266
Added:
Modified:
llvm/lib/Target/ARM/ARM.td
llvm/lib/Target/ARM/ARMSchedule.td
llvm/lib/Target/ARM/ARMScheduleA57.td
llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
llvm/utils/TableGen/CodeGenSchedule.cpp
llvm/utils/TableGen/SubtargetEmitter.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 94e147b29920..0b61bb7a673e 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -885,14 +885,6 @@ def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>;
def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>;
def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
-
-//===----------------------------------------------------------------------===//
-// ARM schedules.
-//===----------------------------------------------------------------------===//
-//
-include "ARMPredicates.td"
-include "ARMSchedule.td"
-
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -901,6 +893,13 @@ include "ARMRegisterInfo.td"
include "ARMRegisterBanks.td"
include "ARMCallingConv.td"
+//===----------------------------------------------------------------------===//
+// ARM schedules.
+//===----------------------------------------------------------------------===//
+//
+include "ARMPredicates.td"
+include "ARMSchedule.td"
+
//===----------------------------------------------------------------------===//
// Instruction Descriptions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td
index 5838e7278c01..53a2a6fec51e 100644
--- a/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/llvm/lib/Target/ARM/ARMSchedule.td
@@ -189,6 +189,23 @@ let FunctionMapper = "ARM_AM::getAM3Op" in {
// LDM, base reg in list
def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>;
+class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>;
+
+class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl,
+ list<int> rcl, SchedWriteRes wr> :
+ SchedWriteRes<!listconcat(wr.ProcResources, resl)> {
+ let Latency = !add(wr.Latency, lat);
+ let ResourceCycles = !listconcat(wr.ResourceCycles, rcl);
+ let NumMicroOps = !add(wr.NumMicroOps, uops);
+ SchedWriteRes BaseWr = wr;
+}
+
+class CheckBranchForm<int n, BranchWriteRes br> :
+ SchedWriteVariant<[
+ SchedVar<IsRegPCPred<n>, [br]>,
+ SchedVar<NoSchedPred, [br.BaseWr]>
+ ]>;
+
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for ARM
//
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index b5e745129c0f..be8591935810 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -173,22 +173,28 @@ def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
+// Check branch forms of ALU ops:
+// check reg 0 for ARM_AM::PC
+// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB
+class A57BranchForm<SchedWriteRes non_br> :
+ BranchWriteRes<2, 1, [A57UnitB], [1], non_br>;
+
// shift by register, conditional or unconditional
// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
// Why more complex instruction uses more simple pipeline?
// May be an error in doc.
def A57WriteALUsi : SchedWriteVariant<[
// lsl #2, lsl #1, or lsr #1.
- SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>,
- SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+ SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>,
+ SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
]>;
def A57WriteALUsr : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
- SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+ SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
+ SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
]>;
def A57WriteALUSsr : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
- SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+ SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
+ SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
]>;
def A57ReadALUsr : SchedReadVariant<[
SchedVar<IsPredicatedPred, [ReadDefault]>,
@@ -830,7 +836,6 @@ def A57WriteVLDMuncond : SchedWriteVariant<[
SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>,
SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>,
SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>,
- SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>,
SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]>
]> { let Variadic=1; }
@@ -851,7 +856,6 @@ def A57WriteVLDMcond : SchedWriteVariant<[
SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>,
SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>,
SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>,
- SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>,
SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]>
]> { let Variadic=1; }
@@ -879,7 +883,6 @@ def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>,
SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>,
SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>,
- SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>,
SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]>
]> { let Variadic=1; }
@@ -900,7 +903,6 @@ def A57WriteVLDMcond_UPD : SchedWriteVariant<[
SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>,
SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>,
SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>,
- SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>,
SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]>
]> { let Variadic=1; }
@@ -1486,7 +1488,7 @@ def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
// -----------------------------------------------------------------------------
// Common definitions
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : SchedAlias<WriteALU, A57Write_1cyc_1I>;
+def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>;
def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td b/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
index ffbf89a39d2c..531b10bc5cfd 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -36,13 +36,16 @@ def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
let ResourceCycles = [20]; }
def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
-def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
-def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1;
+ let ResourceCycles = [1]; }
+def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2;
+ let ResourceCycles = [1]; }
def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; }
def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; }
def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; }
-def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2;
+ let ResourceCycles = [1]; }
def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
let ResourceCycles = [32]; }
def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
diff --git a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
index 1d6755720b22..2257e453e0a8 100644
--- a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
@@ -874,7 +874,7 @@
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 adc r1, r2, #15
-# CHECK-NEXT: 1 1 0.50 adc pc, r2, #16
+# CHECK-NEXT: 2 3 1.00 adc pc, r2, #16
# CHECK-NEXT: 1 1 0.50 adc r1, r2, #240
# CHECK-NEXT: 1 1 0.50 adc r1, r2, #3840
# CHECK-NEXT: 1 1 0.50 adc r1, r2, #61440
@@ -890,9 +890,9 @@
# CHECK-NEXT: 1 1 0.50 adcseq r1, r2, #3840
# CHECK-NEXT: 1 1 0.50 adceq r1, r2, #3840
# CHECK-NEXT: 1 1 0.50 adc r4, r5, r6
-# CHECK-NEXT: 1 1 0.50 adc pc, r5, r6
+# CHECK-NEXT: 2 3 1.00 adc pc, r5, r6
# CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, lsl #1
-# CHECK-NEXT: 1 2 1.00 adc pc, r5, r6, lsl #4
+# CHECK-NEXT: 2 4 1.00 adc pc, r5, r6, lsl #4
# CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, lsl #31
# CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, lsr #1
# CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, lsr #31
@@ -901,7 +901,7 @@
# CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, asr #31
# CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, asr #32
# CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, ror #1
-# CHECK-NEXT: 1 2 1.00 adc pc, r5, r6, ror #2
+# CHECK-NEXT: 2 4 1.00 adc pc, r5, r6, ror #2
# CHECK-NEXT: 1 2 1.00 adc r4, r5, r6, ror #31
# CHECK-NEXT: 1 2 1.00 adc r6, r7, r8, lsl r9
# CHECK-NEXT: 1 2 1.00 adc r6, r7, r8, lsr r9
@@ -954,10 +954,10 @@
# CHECK-NEXT: 1 1 0.50 adds r7, r8, #-2147483638
# CHECK-NEXT: 1 1 0.50 adds r7, r8, #40, #2
# CHECK-NEXT: 1 1 0.50 adr r2, #3
-# CHECK-NEXT: 1 1 0.50 and pc, pc, #8
+# CHECK-NEXT: 2 3 1.00 and pc, pc, #8
# CHECK-NEXT: 1 1 0.50 sub r2, pc, #3
# CHECK-NEXT: 1 1 0.50 sub r1, pc, #0
-# CHECK-NEXT: 1 1 0.50 sub pc, r2, #8
+# CHECK-NEXT: 2 3 1.00 sub pc, r2, #8
# CHECK-NEXT: 1 1 0.50 sub r1, pc, #301989888
# CHECK-NEXT: 1 1 0.50 adr r1, #301989888
# CHECK-NEXT: 1 1 0.50 and r10, r1, #15
@@ -1005,7 +1005,7 @@
# CHECK-NEXT: 1 2 1.00 bic r6, r7, r8, ror r2
# CHECK-NEXT: 1 2 1.00 bic r10, r1, r6, rrx
# CHECK-NEXT: 1 1 0.50 bic r1, r1, #15
-# CHECK-NEXT: 1 1 0.50 bic pc, r1, #15
+# CHECK-NEXT: 2 3 1.00 bic pc, r1, #15
# CHECK-NEXT: 1 1 0.50 bic r10, r10, r1
# CHECK-NEXT: 1 2 1.00 bic r10, r10, r1, lsl #10
# CHECK-NEXT: 1 2 1.00 bic r10, r10, r1, lsr #10
@@ -1102,7 +1102,7 @@
# CHECK-NEXT: 1 1 0.50 eor r7, r8, #-2147483638
# CHECK-NEXT: 1 1 0.50 eor r7, r8, #40, #2
# CHECK-NEXT: 1 1 0.50 eor r4, r5, r6
-# CHECK-NEXT: 1 1 0.50 eor pc, r5, r6
+# CHECK-NEXT: 2 3 1.00 eor pc, r5, r6
# CHECK-NEXT: 1 2 1.00 eor r4, r5, r6, lsl #5
# CHECK-NEXT: 1 2 1.00 eor r4, r5, r6, lsr #5
# CHECK-NEXT: 1 2 1.00 eor r4, r5, r6, lsr #5
@@ -1746,12 +1746,12 @@
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6]
-# CHECK-NEXT: 8.00 144.50 144.50 53.00 524.00 12.00 - -
+# CHECK-NEXT: 16.00 144.50 144.50 53.00 524.00 12.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: - 0.50 0.50 - - - - - adc r1, r2, #15
-# CHECK-NEXT: - 0.50 0.50 - - - - - adc pc, r2, #16
+# CHECK-NEXT: 1.00 0.50 0.50 - - - - - adc pc, r2, #16
# CHECK-NEXT: - 0.50 0.50 - - - - - adc r1, r2, #240
# CHECK-NEXT: - 0.50 0.50 - - - - - adc r1, r2, #3840
# CHECK-NEXT: - 0.50 0.50 - - - - - adc r1, r2, #61440
@@ -1767,9 +1767,9 @@
# CHECK-NEXT: - 0.50 0.50 - - - - - adcseq r1, r2, #3840
# CHECK-NEXT: - 0.50 0.50 - - - - - adceq r1, r2, #3840
# CHECK-NEXT: - 0.50 0.50 - - - - - adc r4, r5, r6
-# CHECK-NEXT: - 0.50 0.50 - - - - - adc pc, r5, r6
+# CHECK-NEXT: 1.00 0.50 0.50 - - - - - adc pc, r5, r6
# CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, lsl #1
-# CHECK-NEXT: - - - - 1.00 - - - adc pc, r5, r6, lsl #4
+# CHECK-NEXT: 1.00 - - - 1.00 - - - adc pc, r5, r6, lsl #4
# CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, lsl #31
# CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, lsr #1
# CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, lsr #31
@@ -1778,7 +1778,7 @@
# CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, asr #31
# CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, asr #32
# CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, ror #1
-# CHECK-NEXT: - - - - 1.00 - - - adc pc, r5, r6, ror #2
+# CHECK-NEXT: 1.00 - - - 1.00 - - - adc pc, r5, r6, ror #2
# CHECK-NEXT: - - - - 1.00 - - - adc r4, r5, r6, ror #31
# CHECK-NEXT: - - - - 1.00 - - - adc r6, r7, r8, lsl r9
# CHECK-NEXT: - - - - 1.00 - - - adc r6, r7, r8, lsr r9
@@ -1831,10 +1831,10 @@
# CHECK-NEXT: - 0.50 0.50 - - - - - adds r7, r8, #-2147483638
# CHECK-NEXT: - 0.50 0.50 - - - - - adds r7, r8, #40, #2
# CHECK-NEXT: - 0.50 0.50 - - - - - adr r2, #3
-# CHECK-NEXT: - 0.50 0.50 - - - - - and pc, pc, #8
+# CHECK-NEXT: 1.00 0.50 0.50 - - - - - and pc, pc, #8
# CHECK-NEXT: - 0.50 0.50 - - - - - sub r2, pc, #3
# CHECK-NEXT: - 0.50 0.50 - - - - - sub r1, pc, #0
-# CHECK-NEXT: - 0.50 0.50 - - - - - sub pc, r2, #8
+# CHECK-NEXT: 1.00 0.50 0.50 - - - - - sub pc, r2, #8
# CHECK-NEXT: - 0.50 0.50 - - - - - sub r1, pc, #301989888
# CHECK-NEXT: - 0.50 0.50 - - - - - adr r1, #301989888
# CHECK-NEXT: - 0.50 0.50 - - - - - and r10, r1, #15
@@ -1882,7 +1882,7 @@
# CHECK-NEXT: - - - - 1.00 - - - bic r6, r7, r8, ror r2
# CHECK-NEXT: - - - - 1.00 - - - bic r10, r1, r6, rrx
# CHECK-NEXT: - 0.50 0.50 - - - - - bic r1, r1, #15
-# CHECK-NEXT: - 0.50 0.50 - - - - - bic pc, r1, #15
+# CHECK-NEXT: 1.00 0.50 0.50 - - - - - bic pc, r1, #15
# CHECK-NEXT: - 0.50 0.50 - - - - - bic r10, r10, r1
# CHECK-NEXT: - - - - 1.00 - - - bic r10, r10, r1, lsl #10
# CHECK-NEXT: - - - - 1.00 - - - bic r10, r10, r1, lsr #10
@@ -1979,7 +1979,7 @@
# CHECK-NEXT: - 0.50 0.50 - - - - - eor r7, r8, #-2147483638
# CHECK-NEXT: - 0.50 0.50 - - - - - eor r7, r8, #40, #2
# CHECK-NEXT: - 0.50 0.50 - - - - - eor r4, r5, r6
-# CHECK-NEXT: - 0.50 0.50 - - - - - eor pc, r5, r6
+# CHECK-NEXT: 1.00 0.50 0.50 - - - - - eor pc, r5, r6
# CHECK-NEXT: - - - - 1.00 - - - eor r4, r5, r6, lsl #5
# CHECK-NEXT: - - - - 1.00 - - - eor r4, r5, r6, lsr #5
# CHECK-NEXT: - - - - 1.00 - - - eor r4, r5, r6, lsr #5
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index 6fe106e7a04e..369a3849999f 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -1348,7 +1348,8 @@ class PredTransitions {
#endif
private:
- bool mutuallyExclusive(Record *PredDef, ArrayRef<PredCheck> Term);
+ bool mutuallyExclusive(Record *PredDef, ArrayRef<Record *> Preds,
+ ArrayRef<PredCheck> Term);
void getIntersectingVariants(
const CodeGenSchedRW &SchedRW, unsigned TransIdx,
std::vector<TransVariant> &IntersectingVariants);
@@ -1367,6 +1368,7 @@ class PredTransitions {
// are always checked in the order they are defined in the .td file. Later
// conditions implicitly negate any prior condition.
bool PredTransitions::mutuallyExclusive(Record *PredDef,
+ ArrayRef<Record *> Preds,
ArrayRef<PredCheck> Term) {
for (const PredCheck &PC: Term) {
if (PC.Predicate == PredDef)
@@ -1377,8 +1379,36 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
RecVec Variants = SchedRW.TheDef->getValueAsListOfDefs("Variants");
if (any_of(Variants, [PredDef](const Record *R) {
return R->getValueAsDef("Predicate") == PredDef;
- }))
+ })) {
+ // To check if PredDef is mutually exclusive with PC we also need to
+ // check that PC.Predicate is exclusive with all predicates from variant
+ // we're expanding. Consider following RW sequence with two variants
+ // (1 & 2), where A, B and C are predicates from corresponding SchedVars:
+ //
+ // 1:A/B - 2:C/B
+ //
+ // Here C is not mutually exclusive with variant (1), because A doesn't
+ // exist in variant (2). This means we have possible transitions from A
+ // to C and from A to B, and fully expanded sequence would look like:
+ //
+ // if (A & C) return ...;
+ // if (A & B) return ...;
+ // if (B) return ...;
+ //
+ // Now let's consider another sequence:
+ //
+ // 1:A/B - 2:A/B
+ //
+ // Here A in variant (2) is mutually exclusive with variant (1), because
+ // A also exists in (2). This means A->B transition is impossible and
+ // expanded sequence would look like:
+ //
+ // if (A) return ...;
+ // if (B) return ...;
+ if (!count(Preds, PC.Predicate))
+ continue;
return true;
+ }
}
return false;
}
@@ -1422,6 +1452,15 @@ static bool hasVariant(ArrayRef<PredTransition> Transitions,
return false;
}
+static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants) {
+ std::vector<Record *> Preds;
+ for (auto &Variant : Variants) {
+ assert(Variant.VarOrSeqDef->isSubClassOf("SchedVar"));
+ Preds.push_back(Variant.VarOrSeqDef->getValueAsDef("Predicate"));
+ }
+ return Preds;
+}
+
// Populate IntersectingVariants with any variants or aliased sequences of the
// given SchedRW whose processor indices and predicates are not mutually
// exclusive with the given transition.
@@ -1468,6 +1507,7 @@ void PredTransitions::getIntersectingVariants(
if (AliasProcIdx == 0)
GenericRW = true;
}
+ std::vector<Record *> AllPreds = getAllPredicates(Variants);
for (TransVariant &Variant : Variants) {
// Don't expand variants if the processor models don't intersect.
// A zero processor index means any processor.
@@ -1486,11 +1526,10 @@ void PredTransitions::getIntersectingVariants(
" Ensure only one SchedAlias exists per RW.");
}
}
- if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) {
- Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate");
- if (mutuallyExclusive(PredDef, TransVec[TransIdx].PredTerm))
- continue;
- }
+ Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate");
+ if (mutuallyExclusive(PredDef, AllPreds, TransVec[TransIdx].PredTerm))
+ continue;
+
if (IntersectingVariants.empty()) {
// The first variant builds on the existing transition.
Variant.TransVecIdx = TransIdx;
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 615ca9116047..4635dde159ca 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1446,20 +1446,20 @@ static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
OS << Buffer;
}
+static bool isTruePredicate(const Record *Rec) {
+ return Rec->isSubClassOf("MCSchedPredicate") &&
+ Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue");
+}
+
static void emitPredicates(const CodeGenSchedTransition &T,
const CodeGenSchedClass &SC, PredicateExpander &PE,
raw_ostream &OS) {
std::string Buffer;
raw_string_ostream SS(Buffer);
- auto IsTruePredicate = [](const Record *Rec) {
- return Rec->isSubClassOf("MCSchedPredicate") &&
- Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue");
- };
-
// If not all predicates are MCTrue, then we need an if-stmt.
unsigned NumNonTruePreds =
- T.PredTerm.size() - count_if(T.PredTerm, IsTruePredicate);
+ T.PredTerm.size() - count_if(T.PredTerm, isTruePredicate);
SS.indent(PE.getIndentLevel() * 2);
@@ -1471,7 +1471,7 @@ static void emitPredicates(const CodeGenSchedTransition &T,
for (const Record *Rec : T.PredTerm) {
// Skip predicates that evaluate to "true".
- if (IsTruePredicate(Rec))
+ if (isTruePredicate(Rec))
continue;
if (FirstNonTruePredicate) {
@@ -1559,6 +1559,11 @@ static void collectProcessorIndices(const CodeGenSchedClass &SC,
}
}
+static bool isAlwaysTrue(const CodeGenSchedTransition &T) {
+ return llvm::all_of(T.PredTerm,
+ [](const Record *R) { return isTruePredicate(R); });
+}
+
void SubtargetEmitter::emitSchedModelHelpersImpl(
raw_ostream &OS, bool OnlyExpandMCInstPredicates) {
IdxVec VariantClasses;
@@ -1601,6 +1606,7 @@ void SubtargetEmitter::emitSchedModelHelpersImpl(
}
// Now emit transitions associated with processor PI.
+ const CodeGenSchedTransition *FinalT = nullptr;
for (const CodeGenSchedTransition &T : SC.Transitions) {
if (PI != 0 && !count(T.ProcIndices, PI))
continue;
@@ -1615,9 +1621,17 @@ void SubtargetEmitter::emitSchedModelHelpersImpl(
if (OnlyExpandMCInstPredicates && !hasMCSchedPredicates(T))
continue;
+ // If transition is folded to 'return X' it should be the last one.
+ if (isAlwaysTrue(T)) {
+ FinalT = &T;
+ continue;
+ }
PE.setIndentLevel(3);
emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS);
}
+ if (FinalT)
+ emitPredicates(*FinalT, SchedModels.getSchedClass(FinalT->ToClassIdx),
+ PE, OS);
OS << " }\n";
More information about the llvm-commits
mailing list