[llvm] 50bd686 - Add support for branch forms of ALU instructions to Cortex-A57 model

Evgeny Leviant via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 24 00:43:59 PST 2020


Author: Evgeny Leviant
Date: 2020-11-24T11:43:51+03:00
New Revision: 50bd686695ac2ca25996be7994808f93a2b753c5

URL: https://github.com/llvm/llvm-project/commit/50bd686695ac2ca25996be7994808f93a2b753c5
DIFF: https://github.com/llvm/llvm-project/commit/50bd686695ac2ca25996be7994808f93a2b753c5.diff

LOG: Add support for branch forms of ALU instructions to Cortex-A57 model

Patch fixes scheduling of ALU instructions which modify pc register. Patch
also fixes computation of mutually exclusive predicates for sequences of
variants to be properly expanded

Differential revision: https://reviews.llvm.org/D91266

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARM.td
    llvm/lib/Target/ARM/ARMSchedule.td
    llvm/lib/Target/ARM/ARMScheduleA57.td
    llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
    llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
    llvm/utils/TableGen/CodeGenSchedule.cpp
    llvm/utils/TableGen/SubtargetEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 94e147b29920..0b61bb7a673e 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -885,14 +885,6 @@ def ARMv6j   : Architecture<"armv6j",      "ARMv7a",   [ARMv6]>;
 def ARMv7k   : Architecture<"armv7k",      "ARMv7a",   [ARMv7a]>;
 def ARMv7s   : Architecture<"armv7s",      "ARMv7a",   [ARMv7a]>;
 
-
-//===----------------------------------------------------------------------===//
-// ARM schedules.
-//===----------------------------------------------------------------------===//
-//
-include "ARMPredicates.td"
-include "ARMSchedule.td"
-
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -901,6 +893,13 @@ include "ARMRegisterInfo.td"
 include "ARMRegisterBanks.td"
 include "ARMCallingConv.td"
 
+//===----------------------------------------------------------------------===//
+// ARM schedules.
+//===----------------------------------------------------------------------===//
+//
+include "ARMPredicates.td"
+include "ARMSchedule.td"
+
 //===----------------------------------------------------------------------===//
 // Instruction Descriptions
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td
index 5838e7278c01..53a2a6fec51e 100644
--- a/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/llvm/lib/Target/ARM/ARMSchedule.td
@@ -189,6 +189,23 @@ let FunctionMapper = "ARM_AM::getAM3Op" in {
 // LDM, base reg in list
 def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>;
 
+class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>;
+
+class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl,
+                     list<int> rcl, SchedWriteRes wr> :
+  SchedWriteRes<!listconcat(wr.ProcResources, resl)> {
+  let Latency = !add(wr.Latency, lat);
+  let ResourceCycles = !listconcat(wr.ResourceCycles, rcl);
+  let NumMicroOps = !add(wr.NumMicroOps, uops);
+  SchedWriteRes BaseWr = wr;
+}
+
+class CheckBranchForm<int n, BranchWriteRes br> :
+  SchedWriteVariant<[
+    SchedVar<IsRegPCPred<n>, [br]>,
+    SchedVar<NoSchedPred,    [br.BaseWr]>
+  ]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
 //

diff  --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index b5e745129c0f..be8591935810 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -173,22 +173,28 @@ def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
 
 def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
 
+// Check branch forms of ALU ops:
+// check reg 0 for ARM_AM::PC
+// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB
+class A57BranchForm<SchedWriteRes non_br> :
+  BranchWriteRes<2, 1, [A57UnitB], [1], non_br>;
+
 // shift by register, conditional or unconditional
 // TODO: according to the doc, conditional uses I0/I1, unconditional uses M
 // Why more complex instruction uses more simple pipeline?
 // May be an error in doc.
 def A57WriteALUsi : SchedWriteVariant<[
   // lsl #2, lsl #1, or lsr #1.
-  SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>,
-  SchedVar<NoSchedPred,      [A57Write_2cyc_1M]>
+  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>,
+  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
 ]>;
 def A57WriteALUsr : SchedWriteVariant<[
-  SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
-  SchedVar<NoSchedPred,      [A57Write_2cyc_1M]>
+  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
+  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
 ]>;
 def A57WriteALUSsr : SchedWriteVariant<[
-  SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
-  SchedVar<NoSchedPred,      [A57Write_2cyc_1M]>
+  SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
+  SchedVar<NoSchedPred,      [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
 ]>;
 def A57ReadALUsr : SchedReadVariant<[
   SchedVar<IsPredicatedPred, [ReadDefault]>,
@@ -830,7 +836,6 @@ def A57WriteVLDMuncond : SchedWriteVariant<[
   SchedVar<A57LMAddrPred5,  A57VLDMOpsListUncond.Writes[0-9]>,
   SchedVar<A57LMAddrPred6,  A57VLDMOpsListUncond.Writes[0-11]>,
   SchedVar<A57LMAddrPred7,  A57VLDMOpsListUncond.Writes[0-13]>,
-  SchedVar<A57LMAddrPred8,  A57VLDMOpsListUncond.Writes[0-15]>,
   SchedVar<NoSchedPred,     A57VLDMOpsListUncond.Writes[0-15]>
 ]> { let Variadic=1; }
 
@@ -851,7 +856,6 @@ def A57WriteVLDMcond : SchedWriteVariant<[
   SchedVar<A57LMAddrPred5,  A57VLDMOpsListCond.Writes[0-9]>,
   SchedVar<A57LMAddrPred6,  A57VLDMOpsListCond.Writes[0-11]>,
   SchedVar<A57LMAddrPred7,  A57VLDMOpsListCond.Writes[0-13]>,
-  SchedVar<A57LMAddrPred8,  A57VLDMOpsListCond.Writes[0-15]>,
   SchedVar<NoSchedPred,     A57VLDMOpsListCond.Writes[0-15]>
 ]> { let Variadic=1; }
 
@@ -879,7 +883,6 @@ def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
   SchedVar<A57LMAddrPred5,  A57VLDMOpsListUncond_Upd.Writes[0-9]>,
   SchedVar<A57LMAddrPred6,  A57VLDMOpsListUncond_Upd.Writes[0-11]>,
   SchedVar<A57LMAddrPred7,  A57VLDMOpsListUncond_Upd.Writes[0-13]>,
-  SchedVar<A57LMAddrPred8,  A57VLDMOpsListUncond_Upd.Writes[0-15]>,
   SchedVar<NoSchedPred,     A57VLDMOpsListUncond_Upd.Writes[0-15]>
 ]> { let Variadic=1; }
 
@@ -900,7 +903,6 @@ def A57WriteVLDMcond_UPD : SchedWriteVariant<[
   SchedVar<A57LMAddrPred5,  A57VLDMOpsListCond_Upd.Writes[0-9]>,
   SchedVar<A57LMAddrPred6,  A57VLDMOpsListCond_Upd.Writes[0-11]>,
   SchedVar<A57LMAddrPred7,  A57VLDMOpsListCond_Upd.Writes[0-13]>,
-  SchedVar<A57LMAddrPred8,  A57VLDMOpsListCond_Upd.Writes[0-15]>,
   SchedVar<NoSchedPred,     A57VLDMOpsListCond_Upd.Writes[0-15]>
 ]> { let Variadic=1; }
 
@@ -1486,7 +1488,7 @@ def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
 // -----------------------------------------------------------------------------
 // Common definitions
 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : SchedAlias<WriteALU, A57Write_1cyc_1I>;
+def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>;
 
 def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
 def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;

diff  --git a/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td b/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
index ffbf89a39d2c..531b10bc5cfd 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -36,13 +36,16 @@ def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
 def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
                                                     let ResourceCycles = [20]; }
 def A57Write_1cyc_1B  : SchedWriteRes<[A57UnitB]> { let Latency = 1;  }
-def A57Write_1cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 1;  }
-def A57Write_2cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 2;  }
+def A57Write_1cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 1;
+                                                    let ResourceCycles = [1]; }
+def A57Write_2cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 2;
+                                                    let ResourceCycles = [1]; }
 def A57Write_3cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 3;  }
 def A57Write_1cyc_1S  : SchedWriteRes<[A57UnitS]> { let Latency = 1;  }
 def A57Write_2cyc_1S  : SchedWriteRes<[A57UnitS]> { let Latency = 2;  }
 def A57Write_3cyc_1S  : SchedWriteRes<[A57UnitS]> { let Latency = 3;  }
-def A57Write_2cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 2;  }
+def A57Write_2cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 2;
+                                                    let ResourceCycles = [1]; }
 def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
                                                     let ResourceCycles = [32]; }
 def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;

diff  --git a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
index 1d6755720b22..2257e453e0a8 100644
--- a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
@@ -874,7 +874,7 @@
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      1     0.50                        adc	r1, r2, #15
-# CHECK-NEXT:  1      1     0.50                        adc	pc, r2, #16
+# CHECK-NEXT:  2      3     1.00                        adc	pc, r2, #16
 # CHECK-NEXT:  1      1     0.50                        adc	r1, r2, #240
 # CHECK-NEXT:  1      1     0.50                        adc	r1, r2, #3840
 # CHECK-NEXT:  1      1     0.50                        adc	r1, r2, #61440
@@ -890,9 +890,9 @@
 # CHECK-NEXT:  1      1     0.50                        adcseq	r1, r2, #3840
 # CHECK-NEXT:  1      1     0.50                        adceq	r1, r2, #3840
 # CHECK-NEXT:  1      1     0.50                        adc	r4, r5, r6
-# CHECK-NEXT:  1      1     0.50                        adc	pc, r5, r6
+# CHECK-NEXT:  2      3     1.00                        adc	pc, r5, r6
 # CHECK-NEXT:  1      2     1.00                        adc	r4, r5, r6, lsl #1
-# CHECK-NEXT:  1      2     1.00                        adc	pc, r5, r6, lsl #4
+# CHECK-NEXT:  2      4     1.00                        adc	pc, r5, r6, lsl #4
 # CHECK-NEXT:  1      2     1.00                        adc	r4, r5, r6, lsl #31
 # CHECK-NEXT:  1      2     1.00                        adc	r4, r5, r6, lsr #1
 # CHECK-NEXT:  1      2     1.00                        adc	r4, r5, r6, lsr #31
@@ -901,7 +901,7 @@
 # CHECK-NEXT:  1      2     1.00                        adc	r4, r5, r6, asr #31
 # CHECK-NEXT:  1      2     1.00                        adc	r4, r5, r6, asr #32
 # CHECK-NEXT:  1      2     1.00                        adc	r4, r5, r6, ror #1
-# CHECK-NEXT:  1      2     1.00                        adc	pc, r5, r6, ror #2
+# CHECK-NEXT:  2      4     1.00                        adc	pc, r5, r6, ror #2
 # CHECK-NEXT:  1      2     1.00                        adc	r4, r5, r6, ror #31
 # CHECK-NEXT:  1      2     1.00                        adc	r6, r7, r8, lsl r9
 # CHECK-NEXT:  1      2     1.00                        adc	r6, r7, r8, lsr r9
@@ -954,10 +954,10 @@
 # CHECK-NEXT:  1      1     0.50                        adds	r7, r8, #-2147483638
 # CHECK-NEXT:  1      1     0.50                        adds	r7, r8, #40, #2
 # CHECK-NEXT:  1      1     0.50                        adr	r2, #3
-# CHECK-NEXT:  1      1     0.50                        and	pc, pc, #8
+# CHECK-NEXT:  2      3     1.00                        and	pc, pc, #8
 # CHECK-NEXT:  1      1     0.50                        sub	r2, pc, #3
 # CHECK-NEXT:  1      1     0.50                        sub	r1, pc, #0
-# CHECK-NEXT:  1      1     0.50                        sub	pc, r2, #8
+# CHECK-NEXT:  2      3     1.00                        sub	pc, r2, #8
 # CHECK-NEXT:  1      1     0.50                        sub	r1, pc, #301989888
 # CHECK-NEXT:  1      1     0.50                        adr	r1, #301989888
 # CHECK-NEXT:  1      1     0.50                        and	r10, r1, #15
@@ -1005,7 +1005,7 @@
 # CHECK-NEXT:  1      2     1.00                        bic	r6, r7, r8, ror r2
 # CHECK-NEXT:  1      2     1.00                        bic	r10, r1, r6, rrx
 # CHECK-NEXT:  1      1     0.50                        bic	r1, r1, #15
-# CHECK-NEXT:  1      1     0.50                        bic	pc, r1, #15
+# CHECK-NEXT:  2      3     1.00                        bic	pc, r1, #15
 # CHECK-NEXT:  1      1     0.50                        bic	r10, r10, r1
 # CHECK-NEXT:  1      2     1.00                        bic	r10, r10, r1, lsl #10
 # CHECK-NEXT:  1      2     1.00                        bic	r10, r10, r1, lsr #10
@@ -1102,7 +1102,7 @@
 # CHECK-NEXT:  1      1     0.50                        eor	r7, r8, #-2147483638
 # CHECK-NEXT:  1      1     0.50                        eor	r7, r8, #40, #2
 # CHECK-NEXT:  1      1     0.50                        eor	r4, r5, r6
-# CHECK-NEXT:  1      1     0.50                        eor	pc, r5, r6
+# CHECK-NEXT:  2      3     1.00                        eor	pc, r5, r6
 # CHECK-NEXT:  1      2     1.00                        eor	r4, r5, r6, lsl #5
 # CHECK-NEXT:  1      2     1.00                        eor	r4, r5, r6, lsr #5
 # CHECK-NEXT:  1      2     1.00                        eor	r4, r5, r6, lsr #5
@@ -1746,12 +1746,12 @@
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1.0]  [1.1]  [2]    [3]    [4]    [5]    [6]
-# CHECK-NEXT: 8.00   144.50 144.50 53.00  524.00 12.00   -      -
+# CHECK-NEXT: 16.00  144.50 144.50 53.00  524.00 12.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1.0]  [1.1]  [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adc	r1, r2, #15
-# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adc	pc, r2, #16
+# CHECK-NEXT: 1.00   0.50   0.50    -      -      -      -      -     adc	pc, r2, #16
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adc	r1, r2, #240
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adc	r1, r2, #3840
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adc	r1, r2, #61440
@@ -1767,9 +1767,9 @@
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adcseq	r1, r2, #3840
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adceq	r1, r2, #3840
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adc	r4, r5, r6
-# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adc	pc, r5, r6
+# CHECK-NEXT: 1.00   0.50   0.50    -      -      -      -      -     adc	pc, r5, r6
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r4, r5, r6, lsl #1
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	pc, r5, r6, lsl #4
+# CHECK-NEXT: 1.00    -      -      -     1.00    -      -      -     adc	pc, r5, r6, lsl #4
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r4, r5, r6, lsl #31
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r4, r5, r6, lsr #1
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r4, r5, r6, lsr #31
@@ -1778,7 +1778,7 @@
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r4, r5, r6, asr #31
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r4, r5, r6, asr #32
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r4, r5, r6, ror #1
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	pc, r5, r6, ror #2
+# CHECK-NEXT: 1.00    -      -      -     1.00    -      -      -     adc	pc, r5, r6, ror #2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r4, r5, r6, ror #31
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r6, r7, r8, lsl r9
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     adc	r6, r7, r8, lsr r9
@@ -1831,10 +1831,10 @@
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adds	r7, r8, #-2147483638
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adds	r7, r8, #40, #2
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adr	r2, #3
-# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     and	pc, pc, #8
+# CHECK-NEXT: 1.00   0.50   0.50    -      -      -      -      -     and	pc, pc, #8
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sub	r2, pc, #3
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sub	r1, pc, #0
-# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sub	pc, r2, #8
+# CHECK-NEXT: 1.00   0.50   0.50    -      -      -      -      -     sub	pc, r2, #8
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sub	r1, pc, #301989888
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     adr	r1, #301989888
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     and	r10, r1, #15
@@ -1882,7 +1882,7 @@
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     bic	r6, r7, r8, ror r2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     bic	r10, r1, r6, rrx
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     bic	r1, r1, #15
-# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     bic	pc, r1, #15
+# CHECK-NEXT: 1.00   0.50   0.50    -      -      -      -      -     bic	pc, r1, #15
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     bic	r10, r10, r1
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     bic	r10, r10, r1, lsl #10
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     bic	r10, r10, r1, lsr #10
@@ -1979,7 +1979,7 @@
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     eor	r7, r8, #-2147483638
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     eor	r7, r8, #40, #2
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     eor	r4, r5, r6
-# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     eor	pc, r5, r6
+# CHECK-NEXT: 1.00   0.50   0.50    -      -      -      -      -     eor	pc, r5, r6
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     eor	r4, r5, r6, lsl #5
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     eor	r4, r5, r6, lsr #5
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     eor	r4, r5, r6, lsr #5

diff  --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index 6fe106e7a04e..369a3849999f 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -1348,7 +1348,8 @@ class PredTransitions {
 #endif
 
 private:
-  bool mutuallyExclusive(Record *PredDef, ArrayRef<PredCheck> Term);
+  bool mutuallyExclusive(Record *PredDef, ArrayRef<Record *> Preds,
+                         ArrayRef<PredCheck> Term);
   void getIntersectingVariants(
     const CodeGenSchedRW &SchedRW, unsigned TransIdx,
     std::vector<TransVariant> &IntersectingVariants);
@@ -1367,6 +1368,7 @@ class PredTransitions {
 // are always checked in the order they are defined in the .td file. Later
 // conditions implicitly negate any prior condition.
 bool PredTransitions::mutuallyExclusive(Record *PredDef,
+                                        ArrayRef<Record *> Preds,
                                         ArrayRef<PredCheck> Term) {
   for (const PredCheck &PC: Term) {
     if (PC.Predicate == PredDef)
@@ -1377,8 +1379,36 @@ bool PredTransitions::mutuallyExclusive(Record *PredDef,
     RecVec Variants = SchedRW.TheDef->getValueAsListOfDefs("Variants");
     if (any_of(Variants, [PredDef](const Record *R) {
           return R->getValueAsDef("Predicate") == PredDef;
-        }))
+        })) {
+      // To check if PredDef is mutually exclusive with PC we also need to
+      // check that PC.Predicate is exclusive with all predicates from variant
+      // we're expanding. Consider following RW sequence with two variants
+      // (1 & 2), where A, B and C are predicates from corresponding SchedVars:
+      //
+      // 1:A/B - 2:C/B
+      //
+      // Here C is not mutually exclusive with variant (1), because A doesn't
+      // exist in variant (2). This means we have possible transitions from A
+      // to C and from A to B, and fully expanded sequence would look like:
+      //
+      // if (A & C) return ...;
+      // if (A & B) return ...;
+      // if (B) return ...;
+      //
+      // Now let's consider another sequence:
+      //
+      // 1:A/B - 2:A/B
+      //
+      // Here A in variant (2) is mutually exclusive with variant (1), because
+      // A also exists in (2). This means A->B transition is impossible and
+      // expanded sequence would look like:
+      //
+      // if (A) return ...;
+      // if (B) return ...;
+      if (!count(Preds, PC.Predicate))
+        continue;
       return true;
+    }
   }
   return false;
 }
@@ -1422,6 +1452,15 @@ static bool hasVariant(ArrayRef<PredTransition> Transitions,
   return false;
 }
 
+static std::vector<Record *> getAllPredicates(ArrayRef<TransVariant> Variants) {
+  std::vector<Record *> Preds;
+  for (auto &Variant : Variants) {
+    assert(Variant.VarOrSeqDef->isSubClassOf("SchedVar"));
+    Preds.push_back(Variant.VarOrSeqDef->getValueAsDef("Predicate"));
+  }
+  return Preds;
+}
+
 // Populate IntersectingVariants with any variants or aliased sequences of the
 // given SchedRW whose processor indices and predicates are not mutually
 // exclusive with the given transition.
@@ -1468,6 +1507,7 @@ void PredTransitions::getIntersectingVariants(
     if (AliasProcIdx == 0)
       GenericRW = true;
   }
+  std::vector<Record *> AllPreds = getAllPredicates(Variants);
   for (TransVariant &Variant : Variants) {
     // Don't expand variants if the processor models don't intersect.
     // A zero processor index means any processor.
@@ -1486,11 +1526,10 @@ void PredTransitions::getIntersectingVariants(
                         " Ensure only one SchedAlias exists per RW.");
       }
     }
-    if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) {
-      Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate");
-      if (mutuallyExclusive(PredDef, TransVec[TransIdx].PredTerm))
-        continue;
-    }
+    Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate");
+    if (mutuallyExclusive(PredDef, AllPreds, TransVec[TransIdx].PredTerm))
+      continue;
+
     if (IntersectingVariants.empty()) {
       // The first variant builds on the existing transition.
       Variant.TransVecIdx = TransIdx;

diff  --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 615ca9116047..4635dde159ca 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -1446,20 +1446,20 @@ static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
   OS << Buffer;
 }
 
+static bool isTruePredicate(const Record *Rec) {
+  return Rec->isSubClassOf("MCSchedPredicate") &&
+         Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue");
+}
+
 static void emitPredicates(const CodeGenSchedTransition &T,
                            const CodeGenSchedClass &SC, PredicateExpander &PE,
                            raw_ostream &OS) {
   std::string Buffer;
   raw_string_ostream SS(Buffer);
 
-  auto IsTruePredicate = [](const Record *Rec) {
-    return Rec->isSubClassOf("MCSchedPredicate") &&
-           Rec->getValueAsDef("Pred")->isSubClassOf("MCTrue");
-  };
-
   // If not all predicates are MCTrue, then we need an if-stmt.
   unsigned NumNonTruePreds =
-      T.PredTerm.size() - count_if(T.PredTerm, IsTruePredicate);
+      T.PredTerm.size() - count_if(T.PredTerm, isTruePredicate);
 
   SS.indent(PE.getIndentLevel() * 2);
 
@@ -1471,7 +1471,7 @@ static void emitPredicates(const CodeGenSchedTransition &T,
 
     for (const Record *Rec : T.PredTerm) {
       // Skip predicates that evaluate to "true".
-      if (IsTruePredicate(Rec))
+      if (isTruePredicate(Rec))
         continue;
 
       if (FirstNonTruePredicate) {
@@ -1559,6 +1559,11 @@ static void collectProcessorIndices(const CodeGenSchedClass &SC,
   }
 }
 
+static bool isAlwaysTrue(const CodeGenSchedTransition &T) {
+  return llvm::all_of(T.PredTerm,
+                      [](const Record *R) { return isTruePredicate(R); });
+}
+
 void SubtargetEmitter::emitSchedModelHelpersImpl(
     raw_ostream &OS, bool OnlyExpandMCInstPredicates) {
   IdxVec VariantClasses;
@@ -1601,6 +1606,7 @@ void SubtargetEmitter::emitSchedModelHelpersImpl(
       }
 
       // Now emit transitions associated with processor PI.
+      const CodeGenSchedTransition *FinalT = nullptr;
       for (const CodeGenSchedTransition &T : SC.Transitions) {
         if (PI != 0 && !count(T.ProcIndices, PI))
           continue;
@@ -1615,9 +1621,17 @@ void SubtargetEmitter::emitSchedModelHelpersImpl(
         if (OnlyExpandMCInstPredicates && !hasMCSchedPredicates(T))
           continue;
 
+        // If transition is folded to 'return X' it should be the last one.
+        if (isAlwaysTrue(T)) {
+          FinalT = &T;
+          continue;
+        }
         PE.setIndentLevel(3);
         emitPredicates(T, SchedModels.getSchedClass(T.ToClassIdx), PE, OS);
       }
+      if (FinalT)
+        emitPredicates(*FinalT, SchedModels.getSchedClass(FinalT->ToClassIdx),
+                       PE, OS);
 
       OS << "    }\n";
 


        


More information about the llvm-commits mailing list