[llvm] 78a871a - [ARM] Use ProcResGroup in Cortex-M7 scheduling model
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 19 13:23:17 PDT 2021
Author: David Penry
Date: 2021-04-19T21:23:05+01:00
New Revision: 78a871abf7018f4a288b773c9c89f99cd5c66b9c
URL: https://github.com/llvm/llvm-project/commit/78a871abf7018f4a288b773c9c89f99cd5c66b9c
DIFF: https://github.com/llvm/llvm-project/commit/78a871abf7018f4a288b773c9c89f99cd5c66b9c.diff
LOG: [ARM] Use ProcResGroup in Cortex-M7 scheduling model
Used to model structural hazards on FP issue, where some
instructions take up 2 issue slots and others one as well
as similar structural hazards on load issue, where some
instructions take up two load lanes and others one.
Differential Revision: https://reviews.llvm.org/D98977
Added:
llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
Modified:
llvm/lib/Target/ARM/ARMScheduleM7.td
llvm/test/tools/llvm-mca/ARM/m7-fp.s
llvm/test/tools/llvm-mca/ARM/m7-int.s
llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMScheduleM7.td b/llvm/lib/Target/ARM/ARMScheduleM7.td
index 12296ad09218d..25bc8401ca84a 100644
--- a/llvm/lib/Target/ARM/ARMScheduleM7.td
+++ b/llvm/lib/Target/ARM/ARMScheduleM7.td
@@ -19,6 +19,8 @@ def CortexM7Model : SchedMachineModel {
let CompleteModel = 0;
}
+let SchedModel = CortexM7Model in {
+
//===--------------------------------------------------------------------===//
// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
// pipe. The stages relevant to scheduling are as follows:
@@ -33,7 +35,9 @@ def CortexM7Model : SchedMachineModel {
// for scheduling, so simple ALU operations executing in EX2 will have
// ReadAdvance<0> (the default) for their source operands and Latency = 1.
-def M7UnitLoad : ProcResource<2> { let BufferSize = 0; }
+def M7UnitLoadL : ProcResource<1> { let BufferSize = 0; }
+def M7UnitLoadH : ProcResource<1> { let BufferSize = 0; }
+def M7UnitLoad : ProcResGroup<[M7UnitLoadL,M7UnitLoadH]> { let BufferSize = 0; }
def M7UnitStore : ProcResource<1> { let BufferSize = 0; }
def M7UnitALU : ProcResource<2>;
def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
@@ -41,14 +45,14 @@ def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
def M7UnitMAC : ProcResource<1> { let BufferSize = 0; }
def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
def M7UnitVFP : ProcResource<1> { let BufferSize = 0; }
-def M7UnitVPort : ProcResource<2> { let BufferSize = 0; }
+def M7UnitVPortL : ProcResource<1> { let BufferSize = 0; }
+def M7UnitVPortH : ProcResource<1> { let BufferSize = 0; }
+def M7UnitVPort : ProcResGroup<[M7UnitVPortL,M7UnitVPortH]> { let BufferSize = 0; }
def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; }
//===---------------------------------------------------------------------===//
// Subtarget-specific SchedWrite types with map ProcResources and set latency.
-let SchedModel = CortexM7Model in {
-
def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
// Basic ALU with shifts.
@@ -105,39 +109,42 @@ def : WriteRes<WriteNoop, []> { let Latency = 0; }
// Floating point conversions.
def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
def : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; }
+def M7WriteFPMOV64 : SchedWriteRes<[M7UnitVPortL, M7UnitVPortH]> {
+ let Latency = 3;
+}
// The FP pipeline has a latency of 3 cycles.
// ALU operations (32/64-bit). These go down the FP pipeline.
def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
-def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 4;
let BeginGroup = 1;
}
// Multiplication
def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
-def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 7;
let BeginGroup = 1;
}
// Multiply-accumulate. FPMAC goes down the FP Pipeline.
def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
-def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 11;
let BeginGroup = 1;
}
// Division. Effective scheduling latency is 3, though real latency is larger
def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
-def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 30;
let BeginGroup = 1;
}
// Square-root. Effective scheduling latency is 3; real latency is larger
def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
-def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
+def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 30;
let BeginGroup = 1;
}
@@ -283,12 +290,12 @@ def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
// VFP loads and stores
def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
-def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> {
+def M7LoadDP : SchedWriteRes<[M7UnitLoadL, M7UnitLoadH, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 2;
let SingleIssue = 1;
}
def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
-def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> {
+def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPortL, M7UnitVPortH]> {
let SingleIssue = 1;
}
diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
new file mode 100644
index 0000000000000..402311e6eeb40
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
@@ -0,0 +1,39 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple arm-arm-eabi -mcpu=cortex-m7 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck %s
+---
+name: test_groups
+alignment: 2
+tracksRegLiveness: true
+liveins:
+ - { reg: '$d0' }
+ - { reg: '$r0' }
+ - { reg: '$r1' }
+ - { reg: '$r2' }
+ - { reg: '$r3' }
+ - { reg: '$r4' }
+frameInfo:
+ maxAlignment: 1
+ maxCallFrameSize: 0
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $r0, $r1, $r2, $r3, $r4
+
+ ; CHECK-LABEL: name: test_groups
+ ; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4
+ ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
+ ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
+ renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
+ renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
+ VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
+ renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
+ t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
+ renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
+ tBX_RET 14 /* CC::al */, $noreg, implicit $d0
+
+...
diff --git a/llvm/test/tools/llvm-mca/ARM/m7-fp.s b/llvm/test/tools/llvm-mca/ARM/m7-fp.s
index 64b293753d47b..dcf9723461dec 100644
--- a/llvm/test/tools/llvm-mca/ARM/m7-fp.s
+++ b/llvm/test/tools/llvm-mca/ARM/m7-fp.s
@@ -253,23 +253,23 @@ vstr.f32 s0, [r0]
# CHECK-NEXT: [0.0] - M7UnitALU
# CHECK-NEXT: [0.1] - M7UnitALU
# CHECK-NEXT: [1] - M7UnitBranch
-# CHECK-NEXT: [2.0] - M7UnitLoad
-# CHECK-NEXT: [2.1] - M7UnitLoad
-# CHECK-NEXT: [3] - M7UnitMAC
-# CHECK-NEXT: [4] - M7UnitSIMD
-# CHECK-NEXT: [5] - M7UnitShift1
-# CHECK-NEXT: [6] - M7UnitShift2
-# CHECK-NEXT: [7] - M7UnitStore
-# CHECK-NEXT: [8] - M7UnitVFP
-# CHECK-NEXT: [9.0] - M7UnitVPort
-# CHECK-NEXT: [9.1] - M7UnitVPort
+# CHECK-NEXT: [2] - M7UnitLoadH
+# CHECK-NEXT: [3] - M7UnitLoadL
+# CHECK-NEXT: [4] - M7UnitMAC
+# CHECK-NEXT: [5] - M7UnitSIMD
+# CHECK-NEXT: [6] - M7UnitShift1
+# CHECK-NEXT: [7] - M7UnitShift2
+# CHECK-NEXT: [8] - M7UnitStore
+# CHECK-NEXT: [9] - M7UnitVFP
+# CHECK-NEXT: [10] - M7UnitVPortH
+# CHECK-NEXT: [11] - M7UnitVPortL
# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
-# CHECK-NEXT: - - - 1.00 1.00 - - - - 2.00 104.00 81.00 81.00
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
+# CHECK-NEXT: - - - 1.50 1.50 - - - - 2.00 104.00 81.00 81.00
# CHECK: Resource pressure by instruction:
-# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vabs.f32 s0, s2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vabs.f64 d0, d2
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vadd.f32 s0, s2, s1
@@ -384,7 +384,7 @@ vstr.f32 s0, [r0]
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsqrt.f64 d0, d2
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vsub.f32 s0, s2, s1
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsub.f64 d0, d2, d1
-# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 1.00 1.00 vldr d0, [r0]
+# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r0]
# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.50 vldr s0, [r0]
# CHECK-NEXT: - - - - - - - - - 1.00 - 1.00 1.00 vstr d0, [r0]
# CHECK-NEXT: - - - - - - - - - 1.00 - 0.50 0.50 vstr s0, [r0]
diff --git a/llvm/test/tools/llvm-mca/ARM/m7-int.s b/llvm/test/tools/llvm-mca/ARM/m7-int.s
index 4768dcf296bc7..4ec6ed56c924e 100644
--- a/llvm/test/tools/llvm-mca/ARM/m7-int.s
+++ b/llvm/test/tools/llvm-mca/ARM/m7-int.s
@@ -862,23 +862,23 @@ yield
# CHECK-NEXT: [0.0] - M7UnitALU
# CHECK-NEXT: [0.1] - M7UnitALU
# CHECK-NEXT: [1] - M7UnitBranch
-# CHECK-NEXT: [2.0] - M7UnitLoad
-# CHECK-NEXT: [2.1] - M7UnitLoad
-# CHECK-NEXT: [3] - M7UnitMAC
-# CHECK-NEXT: [4] - M7UnitSIMD
-# CHECK-NEXT: [5] - M7UnitShift1
-# CHECK-NEXT: [6] - M7UnitShift2
-# CHECK-NEXT: [7] - M7UnitStore
-# CHECK-NEXT: [8] - M7UnitVFP
-# CHECK-NEXT: [9.0] - M7UnitVPort
-# CHECK-NEXT: [9.1] - M7UnitVPort
+# CHECK-NEXT: [2] - M7UnitLoadH
+# CHECK-NEXT: [3] - M7UnitLoadL
+# CHECK-NEXT: [4] - M7UnitMAC
+# CHECK-NEXT: [5] - M7UnitSIMD
+# CHECK-NEXT: [6] - M7UnitShift1
+# CHECK-NEXT: [7] - M7UnitShift2
+# CHECK-NEXT: [8] - M7UnitStore
+# CHECK-NEXT: [9] - M7UnitVFP
+# CHECK-NEXT: [10] - M7UnitVPortH
+# CHECK-NEXT: [11] - M7UnitVPortL
# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 125.00 125.00 - 35.00 35.00 43.00 90.00 88.00 2.00 45.00 - - -
# CHECK: Resource pressure by instruction:
-# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adc r0, r1, #0
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1, #0
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1
diff --git a/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s b/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s
index ad9a2b1ad0c20..a63e7486bf600 100644
--- a/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s
+++ b/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s
@@ -34,26 +34,26 @@ vldr d0, [r1]
# CHECK-NEXT: [0.0] - M7UnitALU
# CHECK-NEXT: [0.1] - M7UnitALU
# CHECK-NEXT: [1] - M7UnitBranch
-# CHECK-NEXT: [2.0] - M7UnitLoad
-# CHECK-NEXT: [2.1] - M7UnitLoad
-# CHECK-NEXT: [3] - M7UnitMAC
-# CHECK-NEXT: [4] - M7UnitSIMD
-# CHECK-NEXT: [5] - M7UnitShift1
-# CHECK-NEXT: [6] - M7UnitShift2
-# CHECK-NEXT: [7] - M7UnitStore
-# CHECK-NEXT: [8] - M7UnitVFP
-# CHECK-NEXT: [9.0] - M7UnitVPort
-# CHECK-NEXT: [9.1] - M7UnitVPort
+# CHECK-NEXT: [2] - M7UnitLoadH
+# CHECK-NEXT: [3] - M7UnitLoadL
+# CHECK-NEXT: [4] - M7UnitMAC
+# CHECK-NEXT: [5] - M7UnitSIMD
+# CHECK-NEXT: [6] - M7UnitShift1
+# CHECK-NEXT: [7] - M7UnitShift2
+# CHECK-NEXT: [8] - M7UnitStore
+# CHECK-NEXT: [9] - M7UnitVFP
+# CHECK-NEXT: [10] - M7UnitVPortH
+# CHECK-NEXT: [11] - M7UnitVPortL
# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
-# CHECK-NEXT: 1.00 1.00 - - 1.00 - - - - - - - 2.00
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
+# CHECK-NEXT: 1.00 1.00 - 1.00 1.00 - - - - - - 1.00 1.00
# CHECK: Resource pressure by instruction:
-# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
+# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - 1.00 - - - - - - - - - - - add.w r1, r1, #1
# CHECK-NEXT: 1.00 - - - - - - - - - - - - add.w r1, r1, #2
-# CHECK-NEXT: - - - - 1.00 - - - - - - - 2.00 vldr d0, [r1]
+# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r1]
# CHECK: Timeline view:
# CHECK-NEXT: Index 012345
More information about the llvm-commits
mailing list