[llvm] 7b90516 - [ARM][LowOverheadLoops] Start insertion point
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 1 02:07:21 PDT 2020
Author: Sam Parker
Date: 2020-10-01T10:05:25+01:00
New Revision: 7b90516d479ca6aadf4e261747d62c854d6f5463
URL: https://github.com/llvm/llvm-project/commit/7b90516d479ca6aadf4e261747d62c854d6f5463
DIFF: https://github.com/llvm/llvm-project/commit/7b90516d479ca6aadf4e261747d62c854d6f5463.diff
LOG: [ARM][LowOverheadLoops] Start insertion point
If possible, try not to move the start position earlier than it
already is.
Differential Revision: https://reviews.llvm.org/D88542
Added:
Modified:
llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index c86cf3235732..fe28470c99ec 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -1015,9 +1015,11 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
// Find a suitable position to insert the loop start instruction. It needs to
// be able to safely define LR.
auto FindStartInsertionPoint = [](MachineInstr *Start,
+ MachineInstr *Dec,
MachineBasicBlock::iterator &InsertPt,
MachineBasicBlock *&InsertBB,
- ReachingDefAnalysis &RDA) {
+ ReachingDefAnalysis &RDA,
+ InstSet &ToRemove) {
// We can define LR because LR already contains the same value.
if (Start->getOperand(0).getReg() == ARM::LR) {
InsertPt = MachineBasicBlock::iterator(Start);
@@ -1033,23 +1035,29 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
MI->getOperand(2).getImm() == ARMCC::AL;
};
- MachineBasicBlock *MBB = Start->getParent();
-
// Find an insertion point:
// - Is there a (mov lr, Count) before Start? If so, and nothing else
- // writes to Count before Start, we can insert at that mov.
+ // writes to Count before Start, we can insert at start.
if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR)) {
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) {
- InsertPt = MachineBasicBlock::iterator(LRDef);
- InsertBB = LRDef->getParent();
+ SmallPtrSet<MachineInstr *, 2> Ignore = { Dec };
+ if (!TryRemove(LRDef, RDA, ToRemove, Ignore))
+ return false;
+ InsertPt = MachineBasicBlock::iterator(Start);
+ InsertBB = Start->getParent();
return true;
}
}
// - Is there a (mov lr, Count) after Start? If so, and nothing else writes
- // to Count after Start, we can insert at that mov.
+ // to Count after Start, we can insert at that mov (which will now be
+ // dead).
+ MachineBasicBlock *MBB = Start->getParent();
if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR)) {
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) {
+ SmallPtrSet<MachineInstr *, 2> Ignore = { Start, Dec };
+ if (!TryRemove(LRDef, RDA, ToRemove, Ignore))
+ return false;
InsertPt = MachineBasicBlock::iterator(LRDef);
InsertBB = LRDef->getParent();
return true;
@@ -1066,7 +1074,8 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
return true;
};
- if (!FindStartInsertionPoint(Start, StartInsertPt, StartInsertBB, RDA)) {
+ if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA,
+ ToRemove)) {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
Revert = true;
return;
@@ -1411,9 +1420,6 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
// Collect and remove the users of iteration count.
SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
LoLoop.End };
- if (LoLoop.StartInsertPt != LoLoop.StartInsertBB->end())
- Killed.insert(&*LoLoop.StartInsertPt);
-
if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))
LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");
}
@@ -1439,9 +1445,6 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
if (!IsDo)
MIB.add(Start->getOperand(1));
- // If we're inserting at a mov lr, then remove it as it's redundant.
- if (InsertPt != MBB->end())
- LoLoop.ToRemove.insert(&*InsertPt);
LoLoop.ToRemove.insert(Start);
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
return &*MIB;
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
index f63d3fde7dee..2ee932acb840 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
@@ -78,12 +78,12 @@ body: |
; CHECK: successors: %bb.5(0x80000000)
; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
- ; CHECK: $lr = t2DLS killed $r4
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg
; CHECK: renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0
; CHECK: $s2 = VMOVSR $r1, 14 /* CC::al */, $noreg
; CHECK: renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg
+ ; CHECK: $lr = t2DLS killed $r4
; CHECK: renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
; CHECK: bb.5:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir
index f59a322e1433..f2cb5547c7dd 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-prev-iteration.mir
@@ -273,7 +273,6 @@ body: |
; CHECK: renamable $r5 = tLDRr renamable $r1, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep617)
; CHECK: renamable $r7, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $r6 = tLDRr renamable $r2, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep418)
- ; CHECK: dead $r12 = tMOVr $lr, 14 /* CC::al */, $noreg
; CHECK: renamable $r8 = nuw t2ADDri killed renamable $r8, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r5, dead $cpsr = tEOR killed renamable $r5, killed renamable $r6, 14 /* CC::al */, $noreg
; CHECK: renamable $r6 = tLDRr renamable $r0, $r3, 14 /* CC::al */, $noreg :: (load 4 from %ir.scevgep219)
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
index a4b094020fcb..b71c2dd7aaa0 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
@@ -175,8 +175,8 @@ body: |
; CHECK: successors: %bb.3(0x80000000)
; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
; CHECK: $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
- ; CHECK: $lr = t2DLS killed $r4
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0
+ ; CHECK: $lr = t2DLS killed $r4
; CHECK: renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg
; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
index b97204c69f32..0e182ece4a75 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
@@ -26,10 +26,10 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
; CHECK-NEXT: vmov s4, r1
-; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: vadd.f32 s0, s3, s3
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: vcvt.f32.u32 s4, s4
+; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: vdiv.f32 s0, s0, s4
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: vmov.i32 q0, #0x0
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 165bf72c7187..a43f564951e9 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1435,8 +1435,8 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(%struct.arm_biqu
; CHECK-NEXT: vdup.32 q1, r6
; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: vmov.f32 s6, s12
-; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: vmov.f32 s10, s14
+; CHECK-NEXT: dls lr, r3
; CHECK-NEXT: vmov.f32 s7, s12
; CHECK-NEXT: vmov.f32 s11, s14
; CHECK-NEXT: .LBB17_3: @ Parent Loop BB17_2 Depth=1
More information about the llvm-commits
mailing list