[llvm] 7f3170e - [MachineSink] Add a loop sink limit
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 17 01:03:06 PST 2021
Author: Sjoerd Meijer
Date: 2021-02-17T08:50:53Z
New Revision: 7f3170ec1943a25a09beb0a989ebc83c9c238f97
URL: https://github.com/llvm/llvm-project/commit/7f3170ec1943a25a09beb0a989ebc83c9c238f97
DIFF: https://github.com/llvm/llvm-project/commit/7f3170ec1943a25a09beb0a989ebc83c9c238f97.diff
LOG: [MachineSink] Add a loop sink limit
To make sure compile-times don't regress, add an option to restrict the number
of instructions considered for sinking as alias analysis can be expensive and
for the same reason also skip large blocks.
Differential Revision: https://reviews.llvm.org/D96485
Added:
llvm/test/CodeGen/AArch64/loop-sink-limit.mir
Modified:
llvm/lib/CodeGen/MachineSink.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 832147ebd0a9..0ac176ede827 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -97,6 +97,11 @@ SinkInstsIntoLoop("sink-insts-to-avoid-spills",
"register spills"),
cl::init(false), cl::Hidden);
+static cl::opt<unsigned> SinkIntoLoopLimit(
+ "machine-sink-loop-limit",
+ cl::desc("The maximum number of instructions considered for loop sinking."),
+ cl::init(50), cl::Hidden);
+
STATISTIC(NumSunk, "Number of machine instructions sunk");
STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop");
STATISTIC(NumSplit, "Number of critical edges split");
@@ -468,7 +473,15 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// Walk the candidates in reverse order so that we start with the use
// of a def-use chain, if there is any.
+ // TODO: Sort the candidates using a cost-model.
+ unsigned i = 0;
for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
+ if (i++ == SinkIntoLoopLimit) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
+ "be analysed.");
+ break;
+ }
+
MachineInstr *I = *It;
if (!SinkIntoLoop(L, *I))
break;
@@ -1243,6 +1256,10 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
return false;
}
+ if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) {
+ LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n");
+ return false;
+ }
LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I);
diff --git a/llvm/test/CodeGen/AArch64/loop-sink-limit.mir b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir
new file mode 100644
index 000000000000..2d85f023f0e4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/loop-sink-limit.mir
@@ -0,0 +1,178 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
+# RUN: -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \
+# RUN: FileCheck %s --check-prefix=SINK1
+#
+# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
+# RUN: -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \
+# RUN: FileCheck %s --check-prefix=SINK2
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64"
+
+ @A = external dso_local global [100 x i32], align 4
+ %struct.A = type { i32, i32, i32, i32, i32, i32 }
+
+ define i32 @do_sink_use_is_not_a_copy(i32 %n) {
+ entry:
+ %cmp63 = icmp sgt i32 %n, 0
+ br i1 %cmp63, label %for.body.preheader, label %for.cond.cleanup
+
+ for.body.preheader: ; preds = %entry
+ %0 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0), align 4
+ br label %for.body
+
+ for.cond.cleanup: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i32 [ %n, %entry ], [ %div, %for.body ]
+ ret i32 %sum.0.lcssa
+
+ for.body: ; preds = %for.body, %for.body.preheader
+ %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+ %sum.065 = phi i32 [ %div, %for.body ], [ %n, %for.body.preheader ]
+ %div = sdiv i32 %sum.065, %0
+ %lsr.iv.next = add i32 %lsr.iv, -1
+ %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ }
+
+...
+---
+name: do_sink_use_is_not_a_copy
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers:
+ - { id: 0, class: gpr32, preferred-register: '' }
+ - { id: 1, class: gpr32all, preferred-register: '' }
+ - { id: 2, class: gpr32sp, preferred-register: '' }
+ - { id: 3, class: gpr32, preferred-register: '' }
+ - { id: 4, class: gpr32all, preferred-register: '' }
+ - { id: 5, class: gpr32all, preferred-register: '' }
+ - { id: 6, class: gpr32common, preferred-register: '' }
+ - { id: 7, class: gpr32, preferred-register: '' }
+ - { id: 8, class: gpr64common, preferred-register: '' }
+ - { id: 9, class: gpr32, preferred-register: '' }
+ - { id: 10, class: gpr32, preferred-register: '' }
+ - { id: 11, class: gpr32, preferred-register: '' }
+liveins:
+ - { reg: '$w0', virtual-reg: '%6' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; SINK1-LABEL: name: do_sink_use_is_not_a_copy
+ ; SINK1: bb.0.entry:
+ ; SINK1: successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ ; SINK1: liveins: $w0
+ ; SINK1: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
+ ; SINK1: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
+ ; SINK1: Bcc 11, %bb.2, implicit $nzcv
+ ; SINK1: B %bb.1
+ ; SINK1: bb.1.for.body.preheader:
+ ; SINK1: successors: %bb.3(0x80000000)
+ ; SINK1: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A
+ ; SINK1: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
+ ; SINK1: B %bb.3
+ ; SINK1: bb.2.for.cond.cleanup:
+ ; SINK1: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3
+ ; SINK1: $w0 = COPY [[PHI]]
+ ; SINK1: RET_ReallyLR implicit $w0
+ ; SINK1: bb.3.for.body:
+ ; SINK1: successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+ ; SINK1: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3
+ ; SINK1: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3
+ ; SINK1: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]]
+ ; SINK1: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
+ ; SINK1: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
+ ; SINK1: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
+ ; SINK1: Bcc 0, %bb.2, implicit $nzcv
+ ; SINK1: B %bb.3
+ ; SINK2-LABEL: name: do_sink_use_is_not_a_copy
+ ; SINK2: bb.0.entry:
+ ; SINK2: successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ ; SINK2: liveins: $w0
+ ; SINK2: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
+ ; SINK2: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
+ ; SINK2: Bcc 11, %bb.2, implicit $nzcv
+ ; SINK2: B %bb.1
+ ; SINK2: bb.1.for.body.preheader:
+ ; SINK2: successors: %bb.3(0x80000000)
+ ; SINK2: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) @A
+ ; SINK2: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui killed [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
+ ; SINK2: B %bb.3
+ ; SINK2: bb.2.for.cond.cleanup:
+ ; SINK2: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %4, %bb.3
+ ; SINK2: $w0 = COPY [[PHI]]
+ ; SINK2: RET_ReallyLR implicit $w0
+ ; SINK2: bb.3.for.body:
+ ; SINK2: successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+ ; SINK2: [[PHI1:%[0-9]+]]:gpr32sp = PHI [[COPY]], %bb.1, %5, %bb.3
+ ; SINK2: [[PHI2:%[0-9]+]]:gpr32 = PHI [[COPY]], %bb.1, %4, %bb.3
+ ; SINK2: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[PHI2]], [[LDRWui]]
+ ; SINK2: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SDIVWr]]
+ ; SINK2: [[SUBSWri1:%[0-9]+]]:gpr32 = SUBSWri [[PHI1]], 1, 0, implicit-def $nzcv
+ ; SINK2: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBSWri1]]
+ ; SINK2: Bcc 0, %bb.2, implicit $nzcv
+ ; SINK2: B %bb.3
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ liveins: $w0
+
+ %6:gpr32common = COPY $w0
+ %7:gpr32 = SUBSWri %6, 1, 0, implicit-def $nzcv
+ Bcc 11, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1.for.body.preheader:
+ successors: %bb.3(0x80000000)
+
+ %8:gpr64common = ADRP target-flags(aarch64-page) @A
+ %9:gpr32 = LDRWui killed %8, target-flags(aarch64-pageoff, aarch64-nc) @A :: (dereferenceable load 4 from `i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i64 0, i64 0)`)
+ B %bb.3
+
+ bb.2.for.cond.cleanup:
+ %1:gpr32all = PHI %6, %bb.0, %4, %bb.3
+ $w0 = COPY %1
+ RET_ReallyLR implicit $w0
+
+ bb.3.for.body:
+ successors: %bb.2(0x04000000), %bb.3(0x7c000000)
+
+ %2:gpr32sp = PHI %6, %bb.1, %5, %bb.3
+ %3:gpr32 = PHI %6, %bb.1, %4, %bb.3
+ %10:gpr32 = SDIVWr %3, %9
+ %4:gpr32all = COPY %10
+ %11:gpr32 = SUBSWri %2, 1, 0, implicit-def $nzcv
+ %5:gpr32all = COPY %11
+ Bcc 0, %bb.2, implicit $nzcv
+ B %bb.3
+
+...
More information about the llvm-commits
mailing list