[llvm] 5abbf20 - [ARM] Additional test for Min loop. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 10 02:49:14 PST 2020


Author: David Green
Date: 2020-12-10T10:49:00Z
New Revision: 5abbf20f0fe5a1fed0d455bc682ca20d0eb651f7

URL: https://github.com/llvm/llvm-project/commit/5abbf20f0fe5a1fed0d455bc682ca20d0eb651f7
DIFF: https://github.com/llvm/llvm-project/commit/5abbf20f0fe5a1fed0d455bc682ca20d0eb651f7.diff

LOG: [ARM] Additional test for Min loop. NFC

Added: 
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
new file mode 100644
index 0000000000000..9899417fb4d8e
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define void @arm_min_q31(i32* nocapture readonly %pSrc, i32 %blockSize, i32* nocapture %pResult, i32* nocapture %pIndex) {
+; CHECK-LABEL: arm_min_q31:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    ldr.w r12, [r0]
+; CHECK-NEXT:    subs.w r9, r1, #1
+; CHECK-NEXT:    beq .LBB0_3
+; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
+; CHECK-NEXT:    subs r6, r1, #2
+; CHECK-NEXT:    and r7, r9, #3
+; CHECK-NEXT:    cmp r6, #3
+; CHECK-NEXT:    str r7, [sp] @ 4-byte Spill
+; CHECK-NEXT:    bhs .LBB0_4
+; CHECK-NEXT:  @ %bb.2:
+; CHECK-NEXT:    mov.w r8, #0
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    mov.w r8, #0
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_4: @ %while.body.preheader.new
+; CHECK-NEXT:    bic r6, r9, #3
+; CHECK-NEXT:    movs r4, #1
+; CHECK-NEXT:    subs r6, #4
+; CHECK-NEXT:    mov.w r8, #0
+; CHECK-NEXT:    add.w lr, r4, r6, lsr #2
+; CHECK-NEXT:    movs r6, #4
+; CHECK-NEXT:    mov lr, lr
+; CHECK-NEXT:    mov r11, lr
+; CHECK-NEXT:  .LBB0_5: @ %while.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr r10, [r0, #16]!
+; CHECK-NEXT:    mov lr, r11
+; CHECK-NEXT:    sub.w lr, lr, #1
+; CHECK-NEXT:    sub.w r9, r9, #4
+; CHECK-NEXT:    ldrd r7, r5, [r0, #-12]
+; CHECK-NEXT:    mov r11, lr
+; CHECK-NEXT:    ldr r4, [r0, #-4]
+; CHECK-NEXT:    cmp r12, r7
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    subgt.w r8, r6, #3
+; CHECK-NEXT:    csel r7, r7, r12, gt
+; CHECK-NEXT:    cmp r7, r5
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    subgt.w r8, r6, #2
+; CHECK-NEXT:    csel r7, r5, r7, gt
+; CHECK-NEXT:    cmp r7, r4
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    subgt.w r8, r6, #1
+; CHECK-NEXT:    csel r7, r4, r7, gt
+; CHECK-NEXT:    cmp r7, r10
+; CHECK-NEXT:    csel r8, r6, r8, gt
+; CHECK-NEXT:    add.w r6, r6, #4
+; CHECK-NEXT:    csel r12, r10, r7, gt
+; CHECK-NEXT:    cmp.w lr, #0
+; CHECK-NEXT:    bne .LBB0_5
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_6: @ %while.end.loopexit.unr-lcssa
+; CHECK-NEXT:    ldr r7, [sp] @ 4-byte Reload
+; CHECK-NEXT:    cbz r7, .LBB0_10
+; CHECK-NEXT:  @ %bb.7: @ %while.body.epil
+; CHECK-NEXT:    ldr r4, [r0, #4]
+; CHECK-NEXT:    sub.w r1, r1, r9
+; CHECK-NEXT:    cmp r12, r4
+; CHECK-NEXT:    csel r8, r1, r8, gt
+; CHECK-NEXT:    csel r12, r4, r12, gt
+; CHECK-NEXT:    cmp r7, #1
+; CHECK-NEXT:    beq .LBB0_10
+; CHECK-NEXT:  @ %bb.8: @ %while.body.epil.1
+; CHECK-NEXT:    ldr r4, [r0, #8]
+; CHECK-NEXT:    cmp r12, r4
+; CHECK-NEXT:    csinc r8, r8, r1, le
+; CHECK-NEXT:    csel r12, r4, r12, gt
+; CHECK-NEXT:    cmp r7, #2
+; CHECK-NEXT:    beq .LBB0_10
+; CHECK-NEXT:  @ %bb.9: @ %while.body.epil.2
+; CHECK-NEXT:    ldr r0, [r0, #12]
+; CHECK-NEXT:    cmp r12, r0
+; CHECK-NEXT:    it gt
+; CHECK-NEXT:    addgt.w r8, r1, #2
+; CHECK-NEXT:    csel r12, r0, r12, gt
+; CHECK-NEXT:  .LBB0_10: @ %while.end
+; CHECK-NEXT:    str.w r12, [r2]
+; CHECK-NEXT:    str.w r8, [r3]
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+  %0 = load i32, i32* %pSrc, align 4
+  %blkCnt.015 = add i32 %blockSize, -1
+  %cmp.not17 = icmp eq i32 %blkCnt.015, 0
+  br i1 %cmp.not17, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  %1 = add i32 %blockSize, -2
+  %xtraiter = and i32 %blkCnt.015, 3
+  %2 = icmp ult i32 %1, 3
+  br i1 %2, label %while.end.loopexit.unr-lcssa, label %while.body.preheader.new
+
+while.body.preheader.new:                         ; preds = %while.body.preheader
+  %unroll_iter = and i32 %blkCnt.015, -4
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %while.body.preheader.new
+  %pSrc.addr.021.pn = phi i32* [ %pSrc, %while.body.preheader.new ], [ %pSrc.addr.021.3, %while.body ]
+  %blkCnt.020 = phi i32 [ %blkCnt.015, %while.body.preheader.new ], [ %blkCnt.0.3, %while.body ]
+  %outIndex.019 = phi i32 [ 0, %while.body.preheader.new ], [ %spec.select14.3, %while.body ]
+  %out.018 = phi i32 [ %0, %while.body.preheader.new ], [ %spec.select.3, %while.body ]
+  %niter = phi i32 [ %unroll_iter, %while.body.preheader.new ], [ %niter.nsub.3, %while.body ]
+  %pSrc.addr.021 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 1
+  %3 = load i32, i32* %pSrc.addr.021, align 4
+  %cmp2 = icmp sgt i32 %out.018, %3
+  %sub3 = sub i32 %blockSize, %blkCnt.020
+  %spec.select = select i1 %cmp2, i32 %3, i32 %out.018
+  %spec.select14 = select i1 %cmp2, i32 %sub3, i32 %outIndex.019
+  %blkCnt.0 = add i32 %blkCnt.020, -1
+  %pSrc.addr.021.1 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 2
+  %4 = load i32, i32* %pSrc.addr.021.1, align 4
+  %cmp2.1 = icmp sgt i32 %spec.select, %4
+  %sub3.1 = sub i32 %blockSize, %blkCnt.0
+  %spec.select.1 = select i1 %cmp2.1, i32 %4, i32 %spec.select
+  %spec.select14.1 = select i1 %cmp2.1, i32 %sub3.1, i32 %spec.select14
+  %blkCnt.0.1 = add i32 %blkCnt.020, -2
+  %pSrc.addr.021.2 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 3
+  %5 = load i32, i32* %pSrc.addr.021.2, align 4
+  %cmp2.2 = icmp sgt i32 %spec.select.1, %5
+  %sub3.2 = sub i32 %blockSize, %blkCnt.0.1
+  %spec.select.2 = select i1 %cmp2.2, i32 %5, i32 %spec.select.1
+  %spec.select14.2 = select i1 %cmp2.2, i32 %sub3.2, i32 %spec.select14.1
+  %blkCnt.0.2 = add i32 %blkCnt.020, -3
+  %pSrc.addr.021.3 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn, i32 4
+  %6 = load i32, i32* %pSrc.addr.021.3, align 4
+  %cmp2.3 = icmp sgt i32 %spec.select.2, %6
+  %sub3.3 = sub i32 %blockSize, %blkCnt.0.2
+  %spec.select.3 = select i1 %cmp2.3, i32 %6, i32 %spec.select.2
+  %spec.select14.3 = select i1 %cmp2.3, i32 %sub3.3, i32 %spec.select14.2
+  %blkCnt.0.3 = add i32 %blkCnt.020, -4
+  %niter.nsub.3 = add i32 %niter, -4
+  %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0
+  br i1 %niter.ncmp.3, label %while.end.loopexit.unr-lcssa, label %while.body
+
+while.end.loopexit.unr-lcssa:                     ; preds = %while.body, %while.body.preheader
+  %spec.select.lcssa.ph = phi i32 [ undef, %while.body.preheader ], [ %spec.select.3, %while.body ]
+  %spec.select14.lcssa.ph = phi i32 [ undef, %while.body.preheader ], [ %spec.select14.3, %while.body ]
+  %pSrc.addr.021.pn.unr = phi i32* [ %pSrc, %while.body.preheader ], [ %pSrc.addr.021.3, %while.body ]
+  %blkCnt.020.unr = phi i32 [ %blkCnt.015, %while.body.preheader ], [ %blkCnt.0.3, %while.body ]
+  %outIndex.019.unr = phi i32 [ 0, %while.body.preheader ], [ %spec.select14.3, %while.body ]
+  %out.018.unr = phi i32 [ %0, %while.body.preheader ], [ %spec.select.3, %while.body ]
+  %lcmp.mod.not = icmp eq i32 %xtraiter, 0
+  br i1 %lcmp.mod.not, label %while.end, label %while.body.epil
+
+while.body.epil:                                  ; preds = %while.end.loopexit.unr-lcssa
+  %pSrc.addr.021.epil = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 1
+  %7 = load i32, i32* %pSrc.addr.021.epil, align 4
+  %cmp2.epil = icmp sgt i32 %out.018.unr, %7
+  %sub3.epil = sub i32 %blockSize, %blkCnt.020.unr
+  %spec.select.epil = select i1 %cmp2.epil, i32 %7, i32 %out.018.unr
+  %spec.select14.epil = select i1 %cmp2.epil, i32 %sub3.epil, i32 %outIndex.019.unr
+  %epil.iter.cmp.not = icmp eq i32 %xtraiter, 1
+  br i1 %epil.iter.cmp.not, label %while.end, label %while.body.epil.1
+
+while.end:                                        ; preds = %while.end.loopexit.unr-lcssa, %while.body.epil.2, %while.body.epil.1, %while.body.epil, %entry
+  %out.0.lcssa = phi i32 [ %0, %entry ], [ %spec.select.lcssa.ph, %while.end.loopexit.unr-lcssa ], [ %spec.select.epil, %while.body.epil ], [ %spec.select.epil.1, %while.body.epil.1 ], [ %spec.select.epil.2, %while.body.epil.2 ]
+  %outIndex.0.lcssa = phi i32 [ 0, %entry ], [ %spec.select14.lcssa.ph, %while.end.loopexit.unr-lcssa ], [ %spec.select14.epil, %while.body.epil ], [ %spec.select14.epil.1, %while.body.epil.1 ], [ %spec.select14.epil.2, %while.body.epil.2 ]
+  store i32 %out.0.lcssa, i32* %pResult, align 4
+  store i32 %outIndex.0.lcssa, i32* %pIndex, align 4
+  ret void
+
+while.body.epil.1:                                ; preds = %while.body.epil
+  %blkCnt.0.epil = add i32 %blkCnt.020.unr, -1
+  %pSrc.addr.021.epil.1 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 2
+  %8 = load i32, i32* %pSrc.addr.021.epil.1, align 4
+  %cmp2.epil.1 = icmp sgt i32 %spec.select.epil, %8
+  %sub3.epil.1 = sub i32 %blockSize, %blkCnt.0.epil
+  %spec.select.epil.1 = select i1 %cmp2.epil.1, i32 %8, i32 %spec.select.epil
+  %spec.select14.epil.1 = select i1 %cmp2.epil.1, i32 %sub3.epil.1, i32 %spec.select14.epil
+  %epil.iter.cmp.1.not = icmp eq i32 %xtraiter, 2
+  br i1 %epil.iter.cmp.1.not, label %while.end, label %while.body.epil.2
+
+while.body.epil.2:                                ; preds = %while.body.epil.1
+  %blkCnt.0.epil.1 = add i32 %blkCnt.020.unr, -2
+  %pSrc.addr.021.epil.2 = getelementptr inbounds i32, i32* %pSrc.addr.021.pn.unr, i32 3
+  %9 = load i32, i32* %pSrc.addr.021.epil.2, align 4
+  %cmp2.epil.2 = icmp sgt i32 %spec.select.epil.1, %9
+  %sub3.epil.2 = sub i32 %blockSize, %blkCnt.0.epil.1
+  %spec.select.epil.2 = select i1 %cmp2.epil.2, i32 %9, i32 %spec.select.epil.1
+  %spec.select14.epil.2 = select i1 %cmp2.epil.2, i32 %sub3.epil.2, i32 %spec.select14.epil.1
+  br label %while.end
+}


        


More information about the llvm-commits mailing list