[llvm] bf916cd - [ARM] Add tests for code that spills in tail predicate loops.

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 7 03:35:08 PDT 2021


Author: David Green
Date: 2021-10-07T11:35:02+01:00
New Revision: bf916cdbd25db138baa7dd9a1a6bfe40005c481c

URL: https://github.com/llvm/llvm-project/commit/bf916cdbd25db138baa7dd9a1a6bfe40005c481c
DIFF: https://github.com/llvm/llvm-project/commit/bf916cdbd25db138baa7dd9a1a6bfe40005c481c.diff

LOG: [ARM] Add tests for code that spills in tail predicate loops.

Added: 
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
new file mode 100644
index 0000000000000..c4c69fcaf05b9
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll
@@ -0,0 +1,375 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s
+
+%struct.arm_2d_size_t = type { i16, i16 }
+define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocapture %phwTargetBase, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
+; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    sub sp, #64
+; CHECK-NEXT:    ldrsh.w r12, [r2, #2]
+; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    itt ge
+; CHECK-NEXT:    ldrshge.w r7, [r2]
+; CHECK-NEXT:    cmpge r7, #1
+; CHECK-NEXT:    blt.w .LBB0_5
+; CHECK-NEXT:  @ %bb.1: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT:    movs r2, #252
+; CHECK-NEXT:    ldr r4, [sp, #152]
+; CHECK-NEXT:    and.w r6, r2, r3, lsr #3
+; CHECK-NEXT:    movs r2, #120
+; CHECK-NEXT:    and.w r5, r2, r3, lsr #9
+; CHECK-NEXT:    lsls r3, r3, #3
+; CHECK-NEXT:    uxtb r3, r3
+; CHECK-NEXT:    muls r6, r4, r6
+; CHECK-NEXT:    rsb.w r2, r4, #256
+; CHECK-NEXT:    vmov.i16 q2, #0xfc
+; CHECK-NEXT:    mul lr, r5, r4
+; CHECK-NEXT:    vdup.16 q4, r6
+; CHECK-NEXT:    mov.w r6, #2016
+; CHECK-NEXT:    vmov.i16 q6, #0xf8
+; CHECK-NEXT:    mul r5, r3, r4
+; CHECK-NEXT:    adds r3, r7, #7
+; CHECK-NEXT:    bic r3, r3, #7
+; CHECK-NEXT:    vdup.16 q3, lr
+; CHECK-NEXT:    subs r3, #8
+; CHECK-NEXT:    movs r4, #1
+; CHECK-NEXT:    vdup.16 q0, r5
+; CHECK-NEXT:    lsls r1, r1, #1
+; CHECK-NEXT:    add.w r3, r4, r3, lsr #3
+; CHECK-NEXT:    vstrw.32 q0, [sp, #48] @ 16-byte Spill
+; CHECK-NEXT:    vmov.i16 q0, #0xf800
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    vdup.16 q5, r6
+; CHECK-NEXT:    vmov.i16 q7, #0x78
+; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
+; CHECK-NEXT:    vstrw.32 q2, [sp, #32] @ 16-byte Spill
+; CHECK-NEXT:    vstrw.32 q3, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT:  .LBB0_2: @ %vector.ph
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB0_3 Depth 2
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r6, r7
+; CHECK-NEXT:    dls lr, r3
+; CHECK-NEXT:  .LBB0_3: @ %vector.body
+; CHECK-NEXT:    @ Parent Loop BB0_2 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vctp.16 r6
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vldrht.u16 q0, [r5]
+; CHECK-NEXT:    vshr.u16 q1, q0, #3
+; CHECK-NEXT:    subs r6, #8
+; CHECK-NEXT:    vand q1, q1, q2
+; CHECK-NEXT:    vmov q2, q4
+; CHECK-NEXT:    vmla.u16 q2, q1, r2
+; CHECK-NEXT:    vshr.u16 q1, q2, #5
+; CHECK-NEXT:    vshl.i16 q2, q0, #3
+; CHECK-NEXT:    vand q3, q1, q5
+; CHECK-NEXT:    vmov q1, q7
+; CHECK-NEXT:    vand q2, q2, q6
+; CHECK-NEXT:    vmov q7, q6
+; CHECK-NEXT:    vmov q6, q5
+; CHECK-NEXT:    vmov q5, q4
+; CHECK-NEXT:    vldrw.u32 q4, [sp, #48] @ 16-byte Reload
+; CHECK-NEXT:    vshr.u16 q0, q0, #9
+; CHECK-NEXT:    vmla.u16 q4, q2, r2
+; CHECK-NEXT:    vshr.u16 q2, q4, #11
+; CHECK-NEXT:    vmov q4, q5
+; CHECK-NEXT:    vmov q5, q6
+; CHECK-NEXT:    vmov q6, q7
+; CHECK-NEXT:    vmov q7, q1
+; CHECK-NEXT:    vorr q1, q3, q2
+; CHECK-NEXT:    vldrw.u32 q2, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT:    vand q0, q0, q7
+; CHECK-NEXT:    vmla.u16 q2, q0, r2
+; CHECK-NEXT:    vldrw.u32 q0, [sp] @ 16-byte Reload
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vldrw.u32 q2, [sp, #32] @ 16-byte Reload
+; CHECK-NEXT:    vorr q0, q1, q0
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vstrht.16 q0, [r5], #16
+; CHECK-NEXT:    le lr, .LBB0_3
+; CHECK-NEXT:  @ %bb.4: @ %for.cond3.for.cond.cleanup7_crit_edge.us
+; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    adds r4, #1
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    cmp r4, r12
+; CHECK-NEXT:    bne .LBB0_2
+; CHECK-NEXT:  .LBB0_5: @ %for.cond.cleanup
+; CHECK-NEXT:    add sp, #64
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %iHeight = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptCopySize, i32 0, i32 1
+  %0 = load i16, i16* %iHeight, align 2
+  %conv1 = sext i16 %0 to i32
+  %and.i = shl i16 %hwColour, 3
+  %shl.i = and i16 %and.i, 248
+  %1 = lshr i16 %hwColour, 9
+  %shl4.i = and i16 %1, 120
+  %2 = lshr i16 %hwColour, 3
+  %3 = and i16 %2, 252
+  %4 = trunc i32 %chRatio to i16
+  %5 = sub i16 256, %4
+  %conv30 = sext i16 %iTargetStride to i32
+  %cmp61 = icmp sgt i16 %0, 0
+  br i1 %cmp61, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup
+
+for.cond3.preheader.lr.ph:                        ; preds = %entry
+  %iWidth = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptCopySize, i32 0, i32 0
+  %6 = load i16, i16* %iWidth, align 2
+  %conv4 = sext i16 %6 to i32
+  %cmp558 = icmp sgt i16 %6, 0
+  br i1 %cmp558, label %for.cond3.preheader.us.preheader, label %for.cond.cleanup
+
+for.cond3.preheader.us.preheader:                 ; preds = %for.cond3.preheader.lr.ph
+  %conv15.us = mul i16 %shl.i, %4
+  %conv15.us.1 = mul i16 %3, %4
+  %conv15.us.2 = mul i16 %shl4.i, %4
+  %n.rnd.up = add nsw i32 %conv4, 7
+  %n.vec = and i32 %n.rnd.up, -8
+  %broadcast.splatinsert75 = insertelement <8 x i16> poison, i16 %5, i32 0
+  %broadcast.splat76 = shufflevector <8 x i16> %broadcast.splatinsert75, <8 x i16> poison, <8 x i32> zeroinitializer
+  %broadcast.splatinsert77 = insertelement <8 x i16> poison, i16 %conv15.us, i32 0
+  %broadcast.splat78 = shufflevector <8 x i16> %broadcast.splatinsert77, <8 x i16> poison, <8 x i32> zeroinitializer
+  %broadcast.splatinsert79 = insertelement <8 x i16> poison, i16 %conv15.us.1, i32 0
+  %broadcast.splat80 = shufflevector <8 x i16> %broadcast.splatinsert79, <8 x i16> poison, <8 x i32> zeroinitializer
+  %broadcast.splatinsert81 = insertelement <8 x i16> poison, i16 %conv15.us.2, i32 0
+  %broadcast.splat82 = shufflevector <8 x i16> %broadcast.splatinsert81, <8 x i16> poison, <8 x i32> zeroinitializer
+  br label %vector.ph
+
+vector.ph:                                        ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.us.preheader
+  %phwTargetBase.addr.063.us = phi i16* [ %add.ptr.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ %phwTargetBase, %for.cond3.preheader.us.preheader ]
+  %y.062.us = phi i32 [ %inc32.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ]
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %next.gep = getelementptr i16, i16* %phwTargetBase.addr.063.us, i32 %index
+  %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %conv4)
+  %7 = bitcast i16* %next.gep to <8 x i16>*
+  %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %7, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison)
+  %8 = shl <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %9 = and <8 x i16> %8, <i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248>
+  %10 = lshr <8 x i16> %wide.masked.load, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+  %11 = and <8 x i16> %10, <i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120>
+  %12 = lshr <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %13 = and <8 x i16> %12, <i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252>
+  %14 = mul <8 x i16> %9, %broadcast.splat76
+  %15 = add <8 x i16> %14, %broadcast.splat78
+  %16 = lshr <8 x i16> %15, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %17 = mul <8 x i16> %13, %broadcast.splat76
+  %18 = add <8 x i16> %17, %broadcast.splat80
+  %19 = mul <8 x i16> %11, %broadcast.splat76
+  %20 = add <8 x i16> %19, %broadcast.splat82
+  %21 = lshr <8 x i16> %18, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  %22 = and <8 x i16> %21, <i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016>
+  %23 = or <8 x i16> %22, %16
+  %24 = and <8 x i16> %20, <i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048>
+  %25 = or <8 x i16> %23, %24
+  %26 = bitcast i16* %next.gep to <8 x i16>*
+  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %25, <8 x i16>* %26, i32 2, <8 x i1> %active.lane.mask)
+  %index.next = add i32 %index, 8
+  %27 = icmp eq i32 %index.next, %n.vec
+  br i1 %27, label %for.cond3.for.cond.cleanup7_crit_edge.us, label %vector.body
+
+for.cond3.for.cond.cleanup7_crit_edge.us:         ; preds = %vector.body
+  %add.ptr.us = getelementptr inbounds i16, i16* %phwTargetBase.addr.063.us, i32 %conv30
+  %inc32.us = add nuw nsw i32 %y.062.us, 1
+  %exitcond66.not = icmp eq i32 %inc32.us, %conv1
+  br i1 %exitcond66.not, label %for.cond.cleanup, label %vector.ph
+
+for.cond.cleanup:                                 ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.lr.ph, %entry
+  ret void
+}
+define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias nocapture %phwTargetBase, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" {
+; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha_sched:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    sub sp, #64
+; CHECK-NEXT:    ldrsh.w r12, [r2, #2]
+; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    blt.w .LBB1_6
+; CHECK-NEXT:  @ %bb.1: @ %for.cond3.preheader.lr.ph
+; CHECK-NEXT:    ldrsh.w r2, [r2]
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt.w .LBB1_6
+; CHECK-NEXT:  @ %bb.2: @ %for.cond3.preheader.us.preheader
+; CHECK-NEXT:    ldr r7, [sp, #152]
+; CHECK-NEXT:    movs r4, #252
+; CHECK-NEXT:    lsls r6, r3, #3
+; CHECK-NEXT:    and.w r4, r4, r3, lsr #3
+; CHECK-NEXT:    uxtb r6, r6
+; CHECK-NEXT:    movs r5, #120
+; CHECK-NEXT:    mul lr, r4, r7
+; CHECK-NEXT:    adds r4, r2, #7
+; CHECK-NEXT:    and.w r5, r5, r3, lsr #9
+; CHECK-NEXT:    muls r6, r7, r6
+; CHECK-NEXT:    bic r4, r4, #7
+; CHECK-NEXT:    vmov.i16 q0, #0x78
+; CHECK-NEXT:    rsb.w r3, r7, #256
+; CHECK-NEXT:    muls r5, r7, r5
+; CHECK-NEXT:    lsls r7, r1, #1
+; CHECK-NEXT:    sub.w r1, r4, #8
+; CHECK-NEXT:    movs r4, #1
+; CHECK-NEXT:    vstrw.32 q0, [sp, #48] @ 16-byte Spill
+; CHECK-NEXT:    vdup.16 q4, r6
+; CHECK-NEXT:    mov.w r6, #2016
+; CHECK-NEXT:    vdup.16 q0, lr
+; CHECK-NEXT:    add.w r1, r4, r1, lsr #3
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    vmov.i16 q2, #0xf8
+; CHECK-NEXT:    vmov.i16 q5, #0xfc
+; CHECK-NEXT:    vstrw.32 q0, [sp, #32] @ 16-byte Spill
+; CHECK-NEXT:    vdup.16 q0, r5
+; CHECK-NEXT:    vdup.16 q6, r6
+; CHECK-NEXT:    vmov.i16 q3, #0xf800
+; CHECK-NEXT:    vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT:    vstrw.32 q3, [sp] @ 16-byte Spill
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  .LBB1_3: @ %vector.ph
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB1_4 Depth 2
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    dls lr, r1
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  .LBB1_4: @ %vector.body
+; CHECK-NEXT:    @ Parent Loop BB1_3 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vctp.16 r6
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vldrht.u16 q0, [r5]
+; CHECK-NEXT:    vshl.i16 q1, q0, #3
+; CHECK-NEXT:    vand q1, q1, q2
+; CHECK-NEXT:    vmov q3, q2
+; CHECK-NEXT:    vmov q2, q4
+; CHECK-NEXT:    vmla.u16 q2, q1, r3
+; CHECK-NEXT:    vshr.u16 q1, q0, #3
+; CHECK-NEXT:    vand q1, q1, q5
+; CHECK-NEXT:    vmov q7, q5
+; CHECK-NEXT:    vmov q5, q4
+; CHECK-NEXT:    vldrw.u32 q4, [sp, #32] @ 16-byte Reload
+; CHECK-NEXT:    vshr.u16 q0, q0, #9
+; CHECK-NEXT:    vmla.u16 q4, q1, r3
+; CHECK-NEXT:    vldrw.u32 q1, [sp, #48] @ 16-byte Reload
+; CHECK-NEXT:    subs r6, #8
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    vldrw.u32 q1, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT:    vmla.u16 q1, q0, r3
+; CHECK-NEXT:    vshr.u16 q0, q2, #11
+; CHECK-NEXT:    vshr.u16 q2, q4, #5
+; CHECK-NEXT:    vand q2, q2, q6
+; CHECK-NEXT:    vorr q0, q2, q0
+; CHECK-NEXT:    vmov q2, q3
+; CHECK-NEXT:    vldrw.u32 q3, [sp] @ 16-byte Reload
+; CHECK-NEXT:    vmov q4, q5
+; CHECK-NEXT:    vmov q5, q7
+; CHECK-NEXT:    vand q1, q1, q3
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vstrht.16 q0, [r5], #16
+; CHECK-NEXT:    le lr, .LBB1_4
+; CHECK-NEXT:  @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us
+; CHECK-NEXT:    @ in Loop: Header=BB1_3 Depth=1
+; CHECK-NEXT:    adds r4, #1
+; CHECK-NEXT:    cmp r4, r12
+; CHECK-NEXT:    add r0, r7
+; CHECK-NEXT:    bne .LBB1_3
+; CHECK-NEXT:  .LBB1_6: @ %for.cond.cleanup
+; CHECK-NEXT:    add sp, #64
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %iHeight = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptCopySize, i32 0, i32 1
+  %0 = load i16, i16* %iHeight, align 2
+  %conv1 = sext i16 %0 to i32
+  %and.i = shl i16 %hwColour, 3
+  %shl.i = and i16 %and.i, 248
+  %1 = lshr i16 %hwColour, 9
+  %shl4.i = and i16 %1, 120
+  %2 = lshr i16 %hwColour, 3
+  %3 = and i16 %2, 252
+  %4 = trunc i32 %chRatio to i16
+  %5 = sub i16 256, %4
+  %conv30 = sext i16 %iTargetStride to i32
+  %cmp61 = icmp sgt i16 %0, 0
+  br i1 %cmp61, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup
+
+for.cond3.preheader.lr.ph:                        ; preds = %entry
+  %iWidth = getelementptr inbounds %struct.arm_2d_size_t, %struct.arm_2d_size_t* %ptCopySize, i32 0, i32 0
+  %6 = load i16, i16* %iWidth, align 2
+  %conv4 = sext i16 %6 to i32
+  %cmp558 = icmp sgt i16 %6, 0
+  br i1 %cmp558, label %for.cond3.preheader.us.preheader, label %for.cond.cleanup
+
+for.cond3.preheader.us.preheader:                 ; preds = %for.cond3.preheader.lr.ph
+  %conv15.us = mul i16 %shl.i, %4
+  %conv15.us.1 = mul i16 %3, %4
+  %conv15.us.2 = mul i16 %shl4.i, %4
+  %n.rnd.up = add nsw i32 %conv4, 7
+  %n.vec = and i32 %n.rnd.up, -8
+  %broadcast.splatinsert75 = insertelement <8 x i16> poison, i16 %5, i32 0
+  %broadcast.splat76 = shufflevector <8 x i16> %broadcast.splatinsert75, <8 x i16> poison, <8 x i32> zeroinitializer
+  %broadcast.splatinsert77 = insertelement <8 x i16> poison, i16 %conv15.us, i32 0
+  %broadcast.splat78 = shufflevector <8 x i16> %broadcast.splatinsert77, <8 x i16> poison, <8 x i32> zeroinitializer
+  %broadcast.splatinsert79 = insertelement <8 x i16> poison, i16 %conv15.us.1, i32 0
+  %broadcast.splat80 = shufflevector <8 x i16> %broadcast.splatinsert79, <8 x i16> poison, <8 x i32> zeroinitializer
+  %broadcast.splatinsert81 = insertelement <8 x i16> poison, i16 %conv15.us.2, i32 0
+  %broadcast.splat82 = shufflevector <8 x i16> %broadcast.splatinsert81, <8 x i16> poison, <8 x i32> zeroinitializer
+  br label %vector.ph
+
+vector.ph:                                        ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.us.preheader
+  %phwTargetBase.addr.063.us = phi i16* [ %add.ptr.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ %phwTargetBase, %for.cond3.preheader.us.preheader ]
+  %y.062.us = phi i32 [ %inc32.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ]
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %next.gep = getelementptr i16, i16* %phwTargetBase.addr.063.us, i32 %index
+  %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %conv4)
+  %7 = bitcast i16* %next.gep to <8 x i16>*
+  %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %7, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison)
+  %8 = shl <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %9 = and <8 x i16> %8, <i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248>
+  %10 = lshr <8 x i16> %wide.masked.load, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+  %11 = and <8 x i16> %10, <i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120>
+  %12 = lshr <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %13 = and <8 x i16> %12, <i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252>
+  %14 = mul <8 x i16> %9, %broadcast.splat76
+  %15 = add <8 x i16> %14, %broadcast.splat78
+  %16 = lshr <8 x i16> %15, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %17 = mul <8 x i16> %13, %broadcast.splat76
+  %18 = add <8 x i16> %17, %broadcast.splat80
+  %19 = mul <8 x i16> %11, %broadcast.splat76
+  %20 = add <8 x i16> %19, %broadcast.splat82
+  %21 = lshr <8 x i16> %18, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  %22 = and <8 x i16> %21, <i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016>
+  %23 = or <8 x i16> %22, %16
+  %24 = and <8 x i16> %20, <i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048>
+  %25 = or <8 x i16> %23, %24
+  %26 = bitcast i16* %next.gep to <8 x i16>*
+  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %25, <8 x i16>* %26, i32 2, <8 x i1> %active.lane.mask)
+  %index.next = add i32 %index, 8
+  %27 = icmp eq i32 %index.next, %n.vec
+  br i1 %27, label %for.cond3.for.cond.cleanup7_crit_edge.us, label %vector.body
+
+for.cond3.for.cond.cleanup7_crit_edge.us:         ; preds = %vector.body
+  %add.ptr.us = getelementptr inbounds i16, i16* %phwTargetBase.addr.063.us, i32 %conv30
+  %inc32.us = add nuw nsw i32 %y.062.us, 1
+  %exitcond66.not = icmp eq i32 %inc32.us, %conv1
+  br i1 %exitcond66.not, label %for.cond.cleanup, label %vector.ph
+
+for.cond.cleanup:                                 ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.lr.ph, %entry
+  ret void
+}
+
+declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1
+declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>) #2
+declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>) #3

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir
new file mode 100644
index 0000000000000..14eb0e1f57752
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir
@@ -0,0 +1,425 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=thumbv8.1m.main-none-unknown-eabi -mattr=+mve -run-pass=arm-low-overhead-loops -arm-enable-subreg-liveness %s -o - --verify-machineinstrs | FileCheck %s
+
+--- |
+  %struct.arm_2d_size_t = type { i16, i16 }
+  define void @none(i16* noalias nocapture %phwTargetBase, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
+    unreachable
+  }
+  define void @copyin(i16* noalias nocapture %phwTargetBase, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
+    unreachable
+  }
+  define void @copyout(i16* noalias nocapture %phwTargetBase, i16 signext %iTargetStride, %struct.arm_2d_size_t* noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) {
+    unreachable
+  }
+...
+---
+name:            none
+tracksRegLiveness: true
+registers:       []
+liveins:
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+body:             |
+  ; CHECK-LABEL: name: none
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.5(0x5c0b8170), %bb.1(0x23f47e90)
+  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r4, $r5, $r6, $lr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $lr, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 16
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r6, -8
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r5, -12
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r4, -16
+  ; CHECK-NEXT:   renamable $r12 = t2LDRSHi12 renamable $r2, 2, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   t2CMPri renamable $r12, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK-NEXT:   t2IT 10, 4, implicit-def $itstate
+  ; CHECK-NEXT:   renamable $r2 = t2LDRSHi12 killed renamable $r2, 0, 10 /* CC::ge */, $cpsr, implicit $r2, implicit $itstate
+  ; CHECK-NEXT:   tCMPi8 renamable $r2, 1, 10 /* CC::ge */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
+  ; CHECK-NEXT:   tBcc %bb.5, 11 /* CC::lt */, killed $cpsr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $r3 = t2LDRHi12 $sp, 16, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r1, dead $cpsr = nsw tLSLri killed renamable $r1, 1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r3 = t2RSBri killed renamable $r3, 256, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   renamable $q0 = MVE_VDUP16 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
+  ; CHECK-NEXT:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $lr = MVE_DLSTP_16 renamable $r2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.3(0x7c000000), %bb.4(0x04000000)
+  ; CHECK-NEXT:   liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 0, $noreg, renamable $lr
+  ; CHECK-NEXT:   renamable $q1 = MVE_VAND killed renamable $q1, renamable $q0, 0, $noreg, renamable $lr, undef renamable $q1
+  ; CHECK-NEXT:   renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 0, killed $noreg, renamable $lr
+  ; CHECK-NEXT:   $lr = MVE_LETP killed renamable $lr, %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.5(0x04000000), %bb.2(0x7c000000)
+  ; CHECK-NEXT:   liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $r3, dead $cpsr = nuw nsw tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r0 = tADDhirr killed renamable $r0, renamable $r1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   tCMPhir renamable $r3, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK-NEXT:   tBcc %bb.2, 1 /* CC::ne */, killed $cpsr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $pc
+  bb.0:
+    successors: %bb.5(0x80000000), %bb.1(0x32000000)
+    liveins: $r0, $r1, $r2, $r4, $r5, $r6, $lr
+
+    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r6, -8
+    frame-setup CFI_INSTRUCTION offset $r5, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    renamable $r12 = t2LDRSHi12 renamable $r2, 2, 14 /* CC::al */, $noreg
+    t2CMPri renamable $r12, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2IT 10, 4, implicit-def $itstate
+    renamable $r2 = t2LDRSHi12 killed renamable $r2, 0, 10 /* CC::ge */, $cpsr, implicit $r2, implicit $itstate
+    tCMPi8 renamable $r2, 1, 10 /* CC::ge */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
+    tBcc %bb.5, 11 /* CC::lt */, killed $cpsr
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+    liveins: $r0, $r1, $r2, $r12
+
+    renamable $r3 = t2LDRHi12 $sp, 16, 14 /* CC::al */, $noreg
+    renamable $r6, dead $cpsr = nsw tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg
+    renamable $r5, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+    renamable $r1, dead $cpsr = nsw tLSLri killed renamable $r1, 1, 14 /* CC::al */, $noreg
+    renamable $r3 = t2RSBri killed renamable $r3, 256, 14 /* CC::al */, $noreg, $noreg
+    renamable $q0 = MVE_VDUP16 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
+    renamable $r3 = t2BICri killed renamable $r6, 7, 14 /* CC::al */, $noreg, $noreg
+    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
+    renamable $r6 = nuw nsw t2ADDrs killed renamable $r5, killed renamable $r3, 27, 14 /* CC::al */, $noreg, $noreg
+    renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+    liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+
+    $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+    $r5 = tMOVr $r2, 14 /* CC::al */, $noreg
+    renamable $lr = t2DoLoopStartTP renamable $r6, renamable $r2
+
+  bb.3:
+    successors: %bb.3(0x7c000000), %bb.4(0x04000000)
+    liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r12
+
+    renamable $vpr = MVE_VCTP16 renamable $r5, 0, $noreg, $noreg
+    renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 8, 14 /* CC::al */, $noreg
+    MVE_VPST 8, implicit $vpr
+    renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 1, renamable $vpr, renamable $lr
+    renamable $q1 = MVE_VAND killed renamable $q1, renamable $q0, 0, $noreg, renamable $lr, undef renamable $q1
+    MVE_VPST 8, implicit $vpr
+    renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 1, killed renamable $vpr, renamable $lr
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr
+    tB %bb.4, 14 /* CC::al */, $noreg
+
+  bb.4:
+    successors: %bb.5(0x04000000), %bb.2(0x7c000000)
+    liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+
+    renamable $r3, dead $cpsr = nuw nsw tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
+    renamable $r0 = tADDhirr killed renamable $r0, renamable $r1, 14 /* CC::al */, $noreg
+    tCMPhir renamable $r3, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    tBcc %bb.2, 1 /* CC::ne */, killed $cpsr
+
+  bb.5:
+    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $pc
+
+...
+---
+name:            copyin
+tracksRegLiveness: true
+registers:       []
+liveins:
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+body:             |
+  ; CHECK-LABEL: name: copyin
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.5(0x5c0b8170), %bb.1(0x23f47e90)
+  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r4, $r5, $r6, $lr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $lr, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 16
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r6, -8
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r5, -12
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r4, -16
+  ; CHECK-NEXT:   renamable $r12 = t2LDRSHi12 renamable $r2, 2, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   t2CMPri renamable $r12, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK-NEXT:   t2IT 10, 4, implicit-def $itstate
+  ; CHECK-NEXT:   renamable $r2 = t2LDRSHi12 killed renamable $r2, 0, 10 /* CC::ge */, $cpsr, implicit $r2, implicit $itstate
+  ; CHECK-NEXT:   tCMPi8 renamable $r2, 1, 10 /* CC::ge */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
+  ; CHECK-NEXT:   tBcc %bb.5, 11 /* CC::lt */, killed $cpsr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $r3 = t2LDRHi12 $sp, 16, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r1, dead $cpsr = nsw tLSLri killed renamable $r1, 1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r3 = t2RSBri killed renamable $r3, 256, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   renamable $q0 = MVE_VDUP16 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
+  ; CHECK-NEXT:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $lr = MVE_DLSTP_16 renamable $r2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.3(0x7c000000), %bb.4(0x04000000)
+  ; CHECK-NEXT:   liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 0, $noreg, renamable $lr
+  ; CHECK-NEXT:   $q2 = MVE_VORR $q0, $q0, 0, $noreg, renamable $lr, undef renamable $q2
+  ; CHECK-NEXT:   renamable $q1 = MVE_VAND killed renamable $q1, killed renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1
+  ; CHECK-NEXT:   renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 0, killed $noreg, renamable $lr
+  ; CHECK-NEXT:   $lr = MVE_LETP killed renamable $lr, %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.5(0x04000000), %bb.2(0x7c000000)
+  ; CHECK-NEXT:   liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $r3, dead $cpsr = nuw nsw tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r0 = tADDhirr killed renamable $r0, renamable $r1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   tCMPhir renamable $r3, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK-NEXT:   tBcc %bb.2, 1 /* CC::ne */, killed $cpsr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $pc
+  bb.0:
+    successors: %bb.5(0x80000000), %bb.1(0x32000000)
+    liveins: $r0, $r1, $r2, $r4, $r5, $r6, $lr
+
+    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r6, -8
+    frame-setup CFI_INSTRUCTION offset $r5, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    renamable $r12 = t2LDRSHi12 renamable $r2, 2, 14 /* CC::al */, $noreg
+    t2CMPri renamable $r12, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2IT 10, 4, implicit-def $itstate
+    renamable $r2 = t2LDRSHi12 killed renamable $r2, 0, 10 /* CC::ge */, $cpsr, implicit $r2, implicit $itstate
+    tCMPi8 renamable $r2, 1, 10 /* CC::ge */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
+    tBcc %bb.5, 11 /* CC::lt */, killed $cpsr
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+    liveins: $r0, $r1, $r2, $r12
+
+    renamable $r3 = t2LDRHi12 $sp, 16, 14 /* CC::al */, $noreg
+    renamable $r6, dead $cpsr = nsw tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg
+    renamable $r5, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+    renamable $r1, dead $cpsr = nsw tLSLri killed renamable $r1, 1, 14 /* CC::al */, $noreg
+    renamable $r3 = t2RSBri killed renamable $r3, 256, 14 /* CC::al */, $noreg, $noreg
+    renamable $q0 = MVE_VDUP16 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
+    renamable $r3 = t2BICri killed renamable $r6, 7, 14 /* CC::al */, $noreg, $noreg
+    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
+    renamable $r6 = nuw nsw t2ADDrs killed renamable $r5, killed renamable $r3, 27, 14 /* CC::al */, $noreg, $noreg
+    renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+    liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+
+    $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+    $r5 = tMOVr $r2, 14 /* CC::al */, $noreg
+    renamable $lr = t2DoLoopStartTP renamable $r6, renamable $r2
+
+  bb.3:
+    successors: %bb.3(0x7c000000), %bb.4(0x04000000)
+    liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r12
+
+    renamable $vpr = MVE_VCTP16 renamable $r5, 0, $noreg, $noreg
+    renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 8, 14 /* CC::al */, $noreg
+    MVE_VPST 8, implicit $vpr
+    renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 1, renamable $vpr, renamable $lr
+    $q2 = MVE_VORR $q0, $q0, 0, $noreg, renamable $lr, undef renamable $q2
+    renamable $q1 = MVE_VAND killed renamable $q1, renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1
+    MVE_VPST 8, implicit $vpr
+    renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 1, killed renamable $vpr, renamable $lr
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr
+    tB %bb.4, 14 /* CC::al */, $noreg
+
+  bb.4:
+    successors: %bb.5(0x04000000), %bb.2(0x7c000000)
+    liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+
+    renamable $r3, dead $cpsr = nuw nsw tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
+    renamable $r0 = tADDhirr killed renamable $r0, renamable $r1, 14 /* CC::al */, $noreg
+    tCMPhir renamable $r3, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    tBcc %bb.2, 1 /* CC::ne */, killed $cpsr
+
+  bb.5:
+    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $pc
+
+...
+---
+name:            copyout
+tracksRegLiveness: true
+registers:       []
+liveins:
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+body:             |
+  ; CHECK-LABEL: name: copyout
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.5(0x5c0b8170), %bb.1(0x23f47e90)
+  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r4, $r5, $r6, $lr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $lr, implicit-def $sp, implicit $sp
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 16
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r6, -8
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r5, -12
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r4, -16
+  ; CHECK-NEXT:   renamable $r12 = t2LDRSHi12 renamable $r2, 2, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   t2CMPri renamable $r12, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK-NEXT:   t2IT 10, 4, implicit-def $itstate
+  ; CHECK-NEXT:   renamable $r2 = t2LDRSHi12 killed renamable $r2, 0, 10 /* CC::ge */, $cpsr, implicit $r2, implicit $itstate
+  ; CHECK-NEXT:   tCMPi8 renamable $r2, 1, 10 /* CC::ge */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
+  ; CHECK-NEXT:   tBcc %bb.5, 11 /* CC::lt */, killed $cpsr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $r3 = t2LDRHi12 $sp, 16, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r6, dead $cpsr = nsw tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r5, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r1, dead $cpsr = nsw tLSLri killed renamable $r1, 1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r3 = t2RSBri killed renamable $r3, 256, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   renamable $q0 = MVE_VDUP16 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
+  ; CHECK-NEXT:   renamable $r3 = t2BICri killed renamable $r6, 7, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r6 = nuw nsw t2ADDrs killed renamable $r5, killed renamable $r3, 27, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $r5 = tMOVr $r2, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $lr = t2DLS renamable $r6
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.3(0x7c000000), %bb.4(0x04000000)
+  ; CHECK-NEXT:   liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $vpr = MVE_VCTP16 renamable $r5, 0, $noreg, $noreg
+  ; CHECK-NEXT:   renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 8, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
+  ; CHECK-NEXT:   renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 1, renamable $vpr, renamable $lr
+  ; CHECK-NEXT:   $q2 = MVE_VORR killed $q0, killed $q0, 0, $noreg, renamable $lr, undef renamable $q2
+  ; CHECK-NEXT:   renamable $q1 = MVE_VAND killed renamable $q1, renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1
+  ; CHECK-NEXT:   $q0 = MVE_VORR killed $q2, killed $q2, 0, $noreg, renamable $lr, undef renamable $q0
+  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
+  ; CHECK-NEXT:   renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 1, killed renamable $vpr, renamable $lr
+  ; CHECK-NEXT:   $lr = t2LEUpdate killed renamable $lr, %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.5(0x04000000), %bb.2(0x7c000000)
+  ; CHECK-NEXT:   liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $r3, dead $cpsr = nuw nsw tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   renamable $r0 = tADDhirr killed renamable $r0, renamable $r1, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   tCMPhir renamable $r3, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK-NEXT:   tBcc %bb.2, 1 /* CC::ne */, killed $cpsr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $pc
+  bb.0:
+    successors: %bb.5(0x80000000), %bb.1(0x32000000)
+    liveins: $r0, $r1, $r2, $r4, $r5, $r6, $lr
+
+    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r6, -8
+    frame-setup CFI_INSTRUCTION offset $r5, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    renamable $r12 = t2LDRSHi12 renamable $r2, 2, 14 /* CC::al */, $noreg
+    t2CMPri renamable $r12, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2IT 10, 4, implicit-def $itstate
+    renamable $r2 = t2LDRSHi12 killed renamable $r2, 0, 10 /* CC::ge */, $cpsr, implicit $r2, implicit $itstate
+    tCMPi8 renamable $r2, 1, 10 /* CC::ge */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
+    tBcc %bb.5, 11 /* CC::lt */, killed $cpsr
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+    liveins: $r0, $r1, $r2, $r12
+
+    renamable $r3 = t2LDRHi12 $sp, 16, 14 /* CC::al */, $noreg
+    renamable $r6, dead $cpsr = nsw tADDi3 renamable $r2, 7, 14 /* CC::al */, $noreg
+    renamable $r5, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+    renamable $r1, dead $cpsr = nsw tLSLri killed renamable $r1, 1, 14 /* CC::al */, $noreg
+    renamable $r3 = t2RSBri killed renamable $r3, 256, 14 /* CC::al */, $noreg, $noreg
+    renamable $q0 = MVE_VDUP16 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q0
+    renamable $r3 = t2BICri killed renamable $r6, 7, 14 /* CC::al */, $noreg, $noreg
+    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
+    renamable $r6 = nuw nsw t2ADDrs killed renamable $r5, killed renamable $r3, 27, 14 /* CC::al */, $noreg, $noreg
+    renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+    liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+
+    $r4 = tMOVr $r0, 14 /* CC::al */, $noreg
+    $r5 = tMOVr $r2, 14 /* CC::al */, $noreg
+    renamable $lr = t2DoLoopStartTP renamable $r6, renamable $r2
+
+  bb.3:
+    successors: %bb.3(0x7c000000), %bb.4(0x04000000)
+    liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r12
+
+    renamable $vpr = MVE_VCTP16 renamable $r5, 0, $noreg, $noreg
+    renamable $r5, dead $cpsr = tSUBi8 killed renamable $r5, 8, 14 /* CC::al */, $noreg
+    MVE_VPST 8, implicit $vpr
+    renamable $q1 = MVE_VLDRHU16 renamable $r4, 0, 1, renamable $vpr, renamable $lr
+    $q2 = MVE_VORR $q0, $q0, 0, $noreg, renamable $lr, undef renamable $q2
+    renamable $q1 = MVE_VAND killed renamable $q1, renamable $q2, 0, $noreg, renamable $lr, undef renamable $q1
+    $q0 = MVE_VORR $q2, $q2, 0, $noreg, renamable $lr, undef renamable $q0
+    MVE_VPST 8, implicit $vpr
+    renamable $r4 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r4, 16, 1, killed renamable $vpr, renamable $lr
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr
+    tB %bb.4, 14 /* CC::al */, $noreg
+
+  bb.4:
+    successors: %bb.5(0x04000000), %bb.2(0x7c000000)
+    liveins: $q0, $r0, $r1, $r2, $r3, $r6, $r12
+
+    renamable $r3, dead $cpsr = nuw nsw tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
+    renamable $r0 = tADDhirr killed renamable $r0, renamable $r1, 14 /* CC::al */, $noreg
+    tCMPhir renamable $r3, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    tBcc %bb.2, 1 /* CC::ne */, killed $cpsr
+
+  bb.5:
+    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $pc
+
+...
+


        


More information about the llvm-commits mailing list