[llvm] 6ec5f32 - [ARM][LowOverheadLoops] Iteration count liveness
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 1 02:14:15 PDT 2020
Author: Sam Parker
Date: 2020-10-01T10:11:10+01:00
New Revision: 6ec5f324973dfbe7b4a489593dde5073ff63ff64
URL: https://github.com/llvm/llvm-project/commit/6ec5f324973dfbe7b4a489593dde5073ff63ff64
DIFF: https://github.com/llvm/llvm-project/commit/6ec5f324973dfbe7b4a489593dde5073ff63ff64.diff
LOG: [ARM][LowOverheadLoops] Iteration count liveness
Before deciding to insert a [W|D]LSTP, check that defining LR with
the element count won't affect any other instructions that should be
taking the iteration count.
Differential Revision: https://reviews.llvm.org/D88549
Added:
Modified:
llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index fe28470c99ec..cd9c38752ad2 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -611,6 +611,23 @@ bool LowOverheadLoop::ValidateTailPredicate() {
return false;
}
+ // Check that creating a [W|D]LSTP, which will define LR with an element
+ // count instead of iteration count, won't affect any other instructions
+ // than the LoopStart and LoopDec.
+ // TODO: We should try to insert the [W|D]LSTP after any of the other uses.
+ if (StartInsertPt == Start && Start->getOperand(0).getReg() == ARM::LR) {
+ if (auto *IterCount = RDA.getMIOperand(Start, 0)) {
+ SmallPtrSet<MachineInstr *, 2> Uses;
+ RDA.getGlobalUses(IterCount, ARM::LR, Uses);
+ for (auto *Use : Uses) {
+ if (Use != Start && Use != Dec) {
+ LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
+ return false;
+ }
+ }
+ }
+ }
+
// For tail predication, we need to provide the number of elements, instead
// of the iteration count, to the loop start instruction. The number of
// elements is provided to the vctp instruction, so we need to check that
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
index c5713c8224b5..c2de31ddef1f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
@@ -142,18 +142,22 @@ body: |
; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
- ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12
+ ; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
+ ; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: bb.1.do.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK: liveins: $r0, $r1, $r2
+ ; CHECK: liveins: $r0, $r1, $r2, $r12
; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
- ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4)
+ ; CHECK: renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: MVE_VPST 8, implicit $vpr
+ ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4)
; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0
- ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4)
- ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1
+ ; CHECK: MVE_VPST 8, implicit $vpr
+ ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4)
+ ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.do.end:
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:
@@ -242,19 +246,23 @@ body: |
; CHECK: renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
- ; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12
+ ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
; CHECK: $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: bb.1.do.body:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK: liveins: $r0, $r1, $r2
+ ; CHECK: liveins: $r0, $r1, $r2, $r12
; CHECK: $lr = tMOVr $r2, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
; CHECK: renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
- ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4)
+ ; CHECK: renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: MVE_VPST 8, implicit $vpr
+ ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4)
; CHECK: renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0
- ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4)
- ; CHECK: dead $lr = MVE_LETP killed renamable $lr, %bb.1
+ ; CHECK: MVE_VPST 8, implicit $vpr
+ ; CHECK: renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4)
+ ; CHECK: dead $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.do.end:
; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
bb.0.entry:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
index b71c2dd7aaa0..94e3e26c819d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
@@ -160,17 +160,20 @@ body: |
; CHECK: renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg
- ; CHECK: dead renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
- ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
+ ; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $r4 = tMOVr $lr, 14 /* CC::al */, $noreg
; CHECK: bb.1.do.body.i:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
- ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12
- ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
- ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
- ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
+ ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12
+ ; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
+ ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
+ ; CHECK: MVE_VPST 4, implicit $vpr
+ ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
+ ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0
+ ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.arm_mean_f32_mve.exit:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
index 0e182ece4a75..1404075dce90 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
@@ -17,13 +17,16 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r12, r0
-; CHECK-NEXT: dlstp.32 lr, r3
+; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: mov r4, lr
; CHECK-NEXT: .LBB0_1: @ %do.body.i
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q1, [r12], #16
-; CHECK-NEXT: vadd.f32 q0, q0, q1
-; CHECK-NEXT: letp lr, .LBB0_1
+; CHECK-NEXT: vctp.32 r3
+; CHECK-NEXT: subs r3, #4
+; CHECK-NEXT: vpstt
+; CHECK-NEXT: vldrwt.u32 q1, [r12], #16
+; CHECK-NEXT: vaddt.f32 q0, q0, q1
+; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: vadd.f32 s0, s3, s3
More information about the llvm-commits
mailing list