[llvm] e4b9867 - [ARM] Expand cannotInsertWDLSTPBetween to the last instruction
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 28 01:15:22 PDT 2020
Author: David Green
Date: 2020-09-28T09:14:40+01:00
New Revision: e4b9867cb69acc272157ef1baaecd9848ad18ecc
URL: https://github.com/llvm/llvm-project/commit/e4b9867cb69acc272157ef1baaecd9848ad18ecc
DIFF: https://github.com/llvm/llvm-project/commit/e4b9867cb69acc272157ef1baaecd9848ad18ecc.diff
LOG: [ARM] Expand cannotInsertWDLSTPBetween to the last instruction
9d9a11c7be037 added this check for predicatable instructions between the
D/WLSTP and the loop's start, but it was missing the last instruction in
the block. Change it to use some iterators instead.
Differential Revision: https://reviews.llvm.org/D88354
Added:
Modified:
llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index ff50d94d328f..f01acd8adfbf 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -629,17 +629,15 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
// width, the Loop Start instruction will immediately generate one or more
// false lane mask which can, incorrectly, affect the proceeding MVE
// instructions in the preheader.
- auto cannotInsertWDLSTPBetween = [](MachineInstr *Begin,
- MachineInstr *End) {
- auto I = MachineBasicBlock::iterator(Begin);
- auto E = MachineBasicBlock::iterator(End);
+ auto cannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E) {
for (; I != E; ++I)
if (shouldInspect(*I))
return true;
return false;
};
- if (cannotInsertWDLSTPBetween(StartInsertPt, &InsertBB->back()))
+ if (cannotInsertWDLSTPBetween(StartInsertPt, InsertBB->end()))
return false;
// Especially in the case of while loops, InsertBB may not be the
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
index e5d629c8730c..429a88884db9 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
@@ -46,6 +46,7 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8
; CHECK: tCMPi8 renamable $r1, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK: renamable $r12 = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: tBcc %bb.2, 2 /* CC::hs */, killed $cpsr
; CHECK: bb.1:
; CHECK: liveins: $r2
@@ -54,24 +55,31 @@ body: |
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
- ; CHECK: liveins: $r0, $r1, $r2
+ ; CHECK: liveins: $r0, $r1, $r2, $r12
+ ; CHECK: renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+ ; CHECK: tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK: t2IT 11, 8, implicit-def $itstate
+ ; CHECK: $r12 = tMOVr renamable $r1, 11 /* CC::lt */, killed $cpsr, implicit killed renamable $r12, implicit killed $itstate
+ ; CHECK: renamable $r3 = t2SUBrr renamable $r1, killed renamable $r12, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg
; CHECK: $r12 = tMOVr $r1, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $r4 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
; CHECK: $r3 = tMOVr $r0, 14 /* CC::al */, $noreg
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12
; CHECK: bb.3:
; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000)
- ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3
+ ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4
; CHECK: renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r3, 0, 0, $noreg
; CHECK: renamable $q0 = nnan ninf nsz MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
; CHECK: renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.3
; CHECK: bb.4:
; CHECK: successors: %bb.5(0x80000000)
- ; CHECK: liveins: $q0, $r0, $r1, $r2
+ ; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
- ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
+ ; CHECK: $lr = t2DLS killed $r4
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg
; CHECK: renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0
; CHECK: $s2 = VMOVSR $r1, 14 /* CC::al */, $noreg
@@ -80,13 +88,16 @@ body: |
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
; CHECK: bb.5:
; CHECK: successors: %bb.5(0x7c000000), %bb.6(0x04000000)
- ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $s4
+ ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $s4
+ ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: $r4 = VMOVRS $s4, 14 /* CC::al */, $noreg
- ; CHECK: renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg
- ; CHECK: renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 0, $noreg, undef renamable $q2
- ; CHECK: renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg
+ ; CHECK: MVE_VPST 2, implicit $vpr
+ ; CHECK: renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr
+ ; CHECK: renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 1, renamable $vpr, undef renamable $q2
+ ; CHECK: renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 1, killed renamable $vpr
+ ; CHECK: renamable $r3, dead $cpsr = nsw tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg
- ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.5
+ ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.5
; CHECK: bb.6:
; CHECK: liveins: $q0, $r1, $r2
; CHECK: renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
index 3cd24f8f5247..c176fcabdfb6 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
@@ -6,10 +6,19 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: mov r3, r1
+; CHECK-NEXT: cmp r1, #4
+; CHECK-NEXT: it ge
+; CHECK-NEXT: movge r3, #4
+; CHECK-NEXT: mov.w r12, #1
+; CHECK-NEXT: subs r3, r1, r3
; CHECK-NEXT: vmov.i32 q0, #0x0
+; CHECK-NEXT: adds r3, #3
+; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: dlstp.32 lr, r3
+; CHECK-NEXT: mov r4, lr
; CHECK-NEXT: .LBB0_1: @ %do.body.i
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r12], #16
@@ -18,7 +27,7 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: mov r3, r1
-; CHECK-NEXT: dlstp.32 lr, r3
+; CHECK-NEXT: dls lr, r4
; CHECK-NEXT: vadd.f32 s0, s3, s3
; CHECK-NEXT: vcvt.f32.u32 s4, s4
; CHECK-NEXT: vdiv.f32 s0, s0, s4
@@ -26,10 +35,13 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB0_3: @ %do.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q1, [r0], #16
-; CHECK-NEXT: vsub.f32 q1, q1, r12
-; CHECK-NEXT: vfma.f32 q0, q1, q1
-; CHECK-NEXT: letp lr, .LBB0_3
+; CHECK-NEXT: vctp.32 r3
+; CHECK-NEXT: subs r3, #4
+; CHECK-NEXT: vpsttt
+; CHECK-NEXT: vldrwt.u32 q1, [r0], #16
+; CHECK-NEXT: vsubt.f32 q1, q1, r12
+; CHECK-NEXT: vfmat.f32 q0, q1, q1
+; CHECK-NEXT: le lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %do.end
; CHECK-NEXT: subs r0, r1, #1
; CHECK-NEXT: vadd.f32 s0, s3, s3
More information about the llvm-commits
mailing list