[llvm] cf0d347 - [GreedyRA ORE] Separate Folder Reloads and Zero Cost Folder Reloads
Serguei Katkov via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 14 00:25:47 PDT 2021
Author: Serguei Katkov
Date: 2021-04-14T14:25:28+07:00
New Revision: cf0d3477aaf5c1d1b11685b852863006fb2d8d9d
URL: https://github.com/llvm/llvm-project/commit/cf0d3477aaf5c1d1b11685b852863006fb2d8d9d
DIFF: https://github.com/llvm/llvm-project/commit/cf0d3477aaf5c1d1b11685b852863006fb2d8d9d.diff
LOG: [GreedyRA ORE] Separate Folder Reloads and Zero Cost Folder Reloads
Patchpoint instructions have operands which is actually zero cost
(or the same as register) to use the value from the stack.
In terms of statistic it makes same to separate them.
Move from computation instructions related to stack spill/reload to
number of stack slot referenced.
Reviewers: reames, MatzeB, anemet, thegameg
Reviewed By: reames
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D100016
Added:
Modified:
llvm/lib/CodeGen/RegAllocGreedy.cpp
llvm/test/CodeGen/X86/statepoint-ra.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 7941d827b0bd..cede0e3ea556 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -551,16 +551,19 @@ class RAGreedy : public MachineFunctionPass,
struct RAGreedyStats {
unsigned Reloads = 0;
unsigned FoldedReloads = 0;
+ unsigned ZeroCostFoldedReloads = 0;
unsigned Spills = 0;
unsigned FoldedSpills = 0;
bool isEmpty() {
- return !(Reloads || FoldedReloads || Spills || FoldedSpills);
+ return !(Reloads || FoldedReloads || Spills || FoldedSpills ||
+ ZeroCostFoldedReloads);
}
void add(RAGreedyStats other) {
Reloads += other.Reloads;
FoldedReloads += other.FoldedReloads;
+ ZeroCostFoldedReloads += other.ZeroCostFoldedReloads;
Spills += other.Spills;
FoldedSpills += other.FoldedSpills;
}
@@ -3139,6 +3142,9 @@ void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
R << NV("NumReloads", Reloads) << " reloads ";
if (FoldedReloads)
R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+ if (ZeroCostFoldedReloads)
+ R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
+ << " zero cost folded reloads ";
}
RAGreedy::RAGreedyStats
@@ -3151,6 +3157,11 @@ RAGreedy::computeNumberOfSplillsReloads(MachineBasicBlock &MBB) {
return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
A->getPseudoValue())->getFrameIndex());
};
+ auto isPatchpointInstr = [](const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT;
+ };
for (MachineInstr &MI : MBB) {
SmallVector<const MachineMemOperand *, 2> Accesses;
@@ -3164,13 +3175,35 @@ RAGreedy::computeNumberOfSplillsReloads(MachineBasicBlock &MBB) {
}
if (TII->hasLoadFromStackSlot(MI, Accesses) &&
llvm::any_of(Accesses, isSpillSlotAccess)) {
- ++Stats.FoldedReloads;
+ if (!isPatchpointInstr(MI)) {
+ Stats.FoldedReloads += Accesses.size();
+ continue;
+ }
+ // For statepoint there may be folded and zero cost folded stack reloads.
+ std::pair<unsigned, unsigned> NonZeroCostRange =
+ TII->getPatchpointUnfoldableRange(MI);
+ SmallSet<unsigned, 16> FoldedReloads;
+ SmallSet<unsigned, 16> ZeroCostFoldedReloads;
+ for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+ if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
+ continue;
+ if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
+ FoldedReloads.insert(MO.getIndex());
+ else
+ ZeroCostFoldedReloads.insert(MO.getIndex());
+ }
+ // If stack slot is used in folded reload it is not zero cost then.
+ for (unsigned Slot : FoldedReloads)
+ ZeroCostFoldedReloads.erase(Slot);
+ Stats.FoldedReloads += FoldedReloads.size();
+ Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
continue;
}
Accesses.clear();
if (TII->hasStoreToStackSlot(MI, Accesses) &&
llvm::any_of(Accesses, isSpillSlotAccess)) {
- ++Stats.FoldedSpills;
+ Stats.FoldedSpills += Accesses.size();
}
}
return Stats;
diff --git a/llvm/test/CodeGen/X86/statepoint-ra.ll b/llvm/test/CodeGen/X86/statepoint-ra.ll
index b5a2c81a0242..d044c02a07d1 100644
--- a/llvm/test/CodeGen/X86/statepoint-ra.ll
+++ b/llvm/test/CodeGen/X86/statepoint-ra.ll
@@ -1,9 +1,22 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true < %s 2>&1 | FileCheck %s
+; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true -pass-remarks-filter=regalloc -pass-remarks-output=%t.yaml -stop-after=greedy -o - < %s 2>&1 | FileCheck %s
+; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s
target triple = "x86_64-unknown-linux-gnu"
-; CHECK-NOT: error: ran out of registers during register allocation
+;CHECK-NOT: error: ran out of registers during register allocation
+
+;YAML: --- !Missed
+;YAML: Pass: regalloc
+;YAML: Name: SpillReload
+;YAML: Function: barney
+;YAML: Args:
+;YAML: - NumSpills: '10'
+;YAML: - String: ' spills '
+;YAML: - NumReloads: '7'
+;YAML: - String: ' reloads '
+;YAML: - NumZeroCostFoldedReloads: '20'
+;YAML: - String: ' zero cost folded reloads '
+;YAML: - String: generated in function
define void @barney(i8 addrspace(1)* %arg, double %arg1, double %arg2, double %arg3, double %arg4, double %arg5, double %arg6, double %arg7, double %arg8, double %arg9, double %arg10, double %arg11, double %arg12) gc "statepoint-example" personality i32* ()* @widget {
bb:
@@ -46,3 +59,98 @@ declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 , i32 , void ()*,
declare token @llvm.experimental.gc.statepoint.p0f_i32p1i8f64f64f64f64f64f64f64f64f64f(i64 , i32 , i32 (i8 addrspace(1)*, double, double, double, double, double, double, double, double, double)*, i32 , i32 , ...)
declare token @llvm.experimental.gc.statepoint.p0f_i32i32p1i8i32f(i64 , i32 , i32 (i32, i8 addrspace(1)*, i32)*, i32 , i32 , ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i8f64f64f64f64f64f64f64f64f64i32f(i64 , i32 , void (i8 addrspace(1)*, double, double, double, double, double, double, double, double, double, i32)*, i32 , i32 , ...)
+
+;CHECK: body: |
+;CHECK: bb.0.bb:
+;CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+;CHECK: liveins: $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7
+;CHECK: %49:fr64 = COPY $xmm7
+;CHECK: %10:fr64 = COPY $xmm6
+;CHECK: %41:fr64 = COPY $xmm5
+;CHECK: %45:fr64 = COPY $xmm4
+;CHECK: %53:fr64 = COPY $xmm3
+;CHECK: %6:fr64 = COPY $xmm2
+;CHECK: %58:fr64 = COPY $xmm1
+;CHECK: %62:fr64 = COPY $xmm0
+;CHECK: %3:gr64 = COPY $rdi
+;CHECK: %76:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0)
+;CHECK: %14:fr64 = MOVSDrm_alt %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1, align 16)
+;CHECK: %66:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2)
+;CHECK: %71:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16)
+;CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %3 :: (store 8 into %stack.0)
+;CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+;CHECK: STATEPOINT 2882400000, 0, 0, target-flags(x86-plt) @blam, 2, 9, 2, 0, 2, 59, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 26, 2, 0, 2, 0, 1, 8, %stack.0, 0, 2, 4, %62, 2, 7, 2, 0, 2, 4, %58, 2, 7, 2, 0, 2, 4, %6, 2, 7, 2, 0, 2, 4, %53, 2, 7, 2, 0, 2, 4, %45, 2, 7, 2, 0, 2, 4, %41, 2, 7, 2, 0, 2, 4, %10, 2, 7, 2, 0, 2, 4, %49, 2, 7, 2, 0, 2, 4, %71, 2, 7, 2, 0, 2, 4, %66, 2, 7, 2, 0, 2, 4, %14, 2, 7, 2, 0, 2, 4, %76, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64_mostregs, implicit-def $rsp, implicit-def $ssp :: (volatile load store 8 on %stack.0)
+;CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+;CHECK: %17:gr32 = MOV32r0 implicit-def dead $eflags
+;CHECK: TEST8rr %17.sub_8bit, %17.sub_8bit, implicit-def $eflags
+;CHECK: MOVSDmr %stack.1, 1, $noreg, 0, $noreg, %41 :: (store 8 into %stack.1)
+;CHECK: MOVSDmr %stack.2, 1, $noreg, 0, $noreg, %45 :: (store 8 into %stack.2)
+;CHECK: MOVSDmr %stack.5, 1, $noreg, 0, $noreg, %58 :: (store 8 into %stack.5)
+;CHECK: MOVSDmr %stack.6, 1, $noreg, 0, $noreg, %62 :: (store 8 into %stack.6)
+;CHECK: JCC_1 %bb.2, 4, implicit killed $eflags
+;CHECK: bb.1:
+;CHECK: successors: %bb.3(0x80000000)
+;CHECK: %54:fr64 = MOVSDrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool)
+;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.3)
+;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.4)
+;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %54 :: (store 8 into %stack.7)
+;CHECK: JMP_1 %bb.3
+;CHECK: bb.2.bb13:
+;CHECK: successors: %bb.3(0x80000000)
+;CHECK: ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %14 :: (store 8 into stack)
+;CHECK: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
+;CHECK: $xmm0 = COPY %62
+;CHECK: $xmm1 = COPY %58
+;CHECK: $xmm2 = COPY %6
+;CHECK: $xmm3 = COPY %45
+;CHECK: $xmm4 = COPY %41
+;CHECK: $xmm5 = COPY %10
+;CHECK: $xmm6 = COPY %71
+;CHECK: $xmm7 = COPY %66
+;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %49 :: (store 8 into %stack.3)
+;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %53 :: (store 8 into %stack.4)
+;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %76 :: (store 8 into %stack.7)
+;CHECK: STATEPOINT 2, 5, 9, undef %22:gr64, $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, 2, 0, 2, 0, 2, 59, 2, 0, 2, 2, 2, 0, 2, 70, 2, 0, 2, 26, 2, 0, 2, 0, 2, 0, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.0, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load 8 from %stack.1), (load 8 from %stack.2), (load 8 from %stack.3), (load 8 from %stack.4), (load 8 from %stack.5), (load 8 from %stack.6), (load 8 from %fixed-stack.2), (load 8 from %fixed-stack.3, align 16), (load 8 from %fixed-stack.0)
+;CHECK: ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+;CHECK: bb.3.bb15:
+;CHECK: successors: %bb.7(0x7ffff800), %bb.4(0x00000800)
+;CHECK: %24:gr32 = MOV32r0 implicit-def dead $eflags
+;CHECK: TEST8rr %24.sub_8bit, %24.sub_8bit, implicit-def $eflags
+;CHECK: JCC_1 %bb.7, 5, implicit killed $eflags
+;CHECK: JMP_1 %bb.4
+;CHECK: bb.4.bb19:
+;CHECK: successors: %bb.5(0x00000000), %bb.6(0x80000000)
+;CHECK: EH_LABEL <mcsymbol >
+;CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+;CHECK: $edx = MOV32r0 implicit-def dead $eflags
+;CHECK: STATEPOINT 1, 16, 3, undef %29:gr64, undef $edi, undef $rsi, $edx, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, 2, 2, 2, 3, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load 8 from %stack.1), (load 8 from %stack.2), (load 8 from %stack.3), (load 8 from %stack.4), (load 8 from %stack.5), (load 8 from %stack.6), (load 8 from %fixed-stack.2), (load 8 from %fixed-stack.3, align 16), (load 8 from %stack.7)
+;CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+;CHECK: EH_LABEL <mcsymbol >
+;CHECK: JMP_1 %bb.5
+;CHECK: bb.5.bb21:
+;CHECK: successors:
+;CHECK: ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+;CHECK: %81:fr64 = MOVSDrm_alt %stack.7, 1, $noreg, 0, $noreg :: (load 8 from %stack.7)
+;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %81 :: (store 8 into stack)
+;CHECK: $xmm0 = MOVSDrm_alt %stack.6, 1, $noreg, 0, $noreg :: (load 8 from %stack.6)
+;CHECK: $xmm1 = MOVSDrm_alt %stack.5, 1, $noreg, 0, $noreg :: (load 8 from %stack.5)
+;CHECK: $xmm2 = MOVSDrm_alt %stack.4, 1, $noreg, 0, $noreg :: (load 8 from %stack.4)
+;CHECK: $xmm3 = MOVSDrm_alt %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2)
+;CHECK: $xmm4 = MOVSDrm_alt %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
+;CHECK: $xmm5 = MOVSDrm_alt %stack.3, 1, $noreg, 0, $noreg :: (load 8 from %stack.3)
+;CHECK: %74:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.3, align 16)
+;CHECK: %95:fr64 = COPY %74
+;CHECK: $xmm6 = COPY %95
+;CHECK: $esi = MOV32ri 51
+;CHECK: %69:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2)
+;CHECK: %97:fr64 = COPY %69
+;CHECK: $xmm7 = COPY %97
+;CHECK: STATEPOINT 2, 5, 10, undef %36:gr64, undef $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, killed $esi, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 2, 2, 46, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 3, 2, 51, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (load 8 from %stack.7), (load 8 from %stack.6), (load 8 from %stack.5), (load 8 from %stack.4), (load 8 from %stack.2), (load 8 from %stack.1), (load 8 from %stack.3), (load 8 from %fixed-stack.3, align 16), (load 8 from %fixed-stack.2)
+;CHECK: ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+;CHECK: bb.6.bb23 (landing-pad):
+;CHECK: liveins: $rax, $rdx
+;CHECK: EH_LABEL <mcsymbol >
+;CHECK: RET 0
+;CHECK: bb.7.bb25:
+;CHECK: RET 0
More information about the llvm-commits
mailing list