[llvm] 785080e - [ARM] Low overhead loop memcpy lowering test. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 3 03:45:03 PST 2020


Author: David Green
Date: 2020-11-03T11:44:50Z
New Revision: 785080e3fa941f77dc20d5d6d2677196445bec4d

URL: https://github.com/llvm/llvm-project/commit/785080e3fa941f77dc20d5d6d2677196445bec4d
DIFF: https://github.com/llvm/llvm-project/commit/785080e3fa941f77dc20d5d6d2677196445bec4d.diff

LOG: [ARM] Low overhead loop memcpy lowering test. NFC

Added: 
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
new file mode 100644
index 000000000000..1ab0b606b4ac
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
+define void @test_memcpy(i32* nocapture %x, i32* nocapture readonly %y, i32 %n, i32 %m) {
+; CHECK-LABEL: test_memcpy:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB0_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT:    mov r8, r3
+; CHECK-NEXT:    mov lr, r2
+; CHECK-NEXT:    mov r9, r1
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsls r7, r3, #2
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:  .LBB0_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    adds r0, r6, r4
+; CHECK-NEXT:    add.w r1, r9, r4
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r5, lr
+; CHECK-NEXT:    bl __aeabi_memcpy4
+; CHECK-NEXT:    mov lr, r5
+; CHECK-NEXT:    add r4, r7
+; CHECK-NEXT:    subs.w lr, lr, #1
+; CHECK-NEXT:    bne .LBB0_2
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_3: @ %for.cond.cleanup
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %x.addr.010 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
+  %y.addr.09 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+  %0 = bitcast i32* %x.addr.010 to i8*
+  %1 = bitcast i32* %y.addr.09 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 %m, i1 false)
+  %add.ptr = getelementptr inbounds i32, i32* %x.addr.010, i32 %m
+  %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.09, i32 %m
+  %inc = add nuw nsw i32 %i.011, 1
+  %exitcond.not = icmp eq i32 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @test_memset(i32* nocapture %x, i32 %n, i32 %m) {
+; CHECK-LABEL: test_memset:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    cmp r1, #1
+; CHECK-NEXT:    blt .LBB1_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT:    mov r4, r2
+; CHECK-NEXT:    mov lr, r1
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    lsls r6, r2, #2
+; CHECK-NEXT:  .LBB1_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    mov r7, lr
+; CHECK-NEXT:    bl __aeabi_memclr4
+; CHECK-NEXT:    mov lr, r7
+; CHECK-NEXT:    add r5, r6
+; CHECK-NEXT:    subs.w lr, lr, #1
+; CHECK-NEXT:    bne .LBB1_2
+; CHECK-NEXT:    b .LBB1_3
+; CHECK-NEXT:  .LBB1_3: @ %for.cond.cleanup
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+entry:
+  %cmp5 = icmp sgt i32 %n, 0
+  br i1 %cmp5, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %x.addr.06 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
+  %0 = bitcast i32* %x.addr.06 to i8*
+  tail call void @llvm.memset.p0i8.i32(i8* align 4 %0, i8 0, i32 %m, i1 false)
+  %add.ptr = getelementptr inbounds i32, i32* %x.addr.06, i32 %m
+  %inc = add nuw nsw i32 %i.07, 1
+  %exitcond.not = icmp eq i32 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @test_memmove(i32* nocapture %x, i32* nocapture readonly %y, i32 %n, i32 %m) {
+; CHECK-LABEL: test_memmove:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    blt .LBB2_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT:    mov r8, r3
+; CHECK-NEXT:    mov lr, r2
+; CHECK-NEXT:    mov r9, r1
+; CHECK-NEXT:    mov r6, r0
+; CHECK-NEXT:    lsls r7, r3, #2
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:  .LBB2_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    adds r0, r6, r4
+; CHECK-NEXT:    add.w r1, r9, r4
+; CHECK-NEXT:    mov r2, r8
+; CHECK-NEXT:    mov r5, lr
+; CHECK-NEXT:    bl __aeabi_memmove4
+; CHECK-NEXT:    mov lr, r5
+; CHECK-NEXT:    add r4, r7
+; CHECK-NEXT:    subs.w lr, lr, #1
+; CHECK-NEXT:    bne .LBB2_2
+; CHECK-NEXT:    b .LBB2_3
+; CHECK-NEXT:  .LBB2_3: @ %for.cond.cleanup
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %x.addr.010 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
+  %y.addr.09 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+  %0 = bitcast i32* %x.addr.010 to i8*
+  %1 = bitcast i32* %y.addr.09 to i8*
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 %m, i1 false)
+  %add.ptr = getelementptr inbounds i32, i32* %x.addr.010, i32 %m
+  %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.09, i32 %m
+  %inc = add nuw nsw i32 %i.011, 1
+  %exitcond.not = icmp eq i32 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+
+define void @test_memcpy16(i32* nocapture %x, i32* nocapture readonly %y, i32 %n) {
+; CHECK-LABEL: test_memcpy16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    poplt {r4, pc}
+; CHECK-NEXT:  .LBB3_1: @ %for.body.preheader
+; CHECK-NEXT:    dls lr, r2
+; CHECK-NEXT:  .LBB3_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldm.w r1, {r2, r3, r12}
+; CHECK-NEXT:    ldr r4, [r1, #12]
+; CHECK-NEXT:    adds r1, #64
+; CHECK-NEXT:    stm.w r0, {r2, r3, r12}
+; CHECK-NEXT:    str r4, [r0, #12]
+; CHECK-NEXT:    adds r0, #64
+; CHECK-NEXT:    le lr, .LBB3_2
+; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %x.addr.08 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
+  %y.addr.07 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+  %0 = bitcast i32* %x.addr.08 to i8*
+  %1 = bitcast i32* %y.addr.07 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 4 dereferenceable(16) %0, i8* nonnull align 4 dereferenceable(16) %1, i32 16, i1 false)
+  %add.ptr = getelementptr inbounds i32, i32* %x.addr.08, i32 16
+  %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.07, i32 16
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond.not = icmp eq i32 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @test_memset16(i32* nocapture %x, i32 %n) {
+; CHECK-LABEL: test_memset16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    cmp r1, #1
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    poplt {r7, pc}
+; CHECK-NEXT:  .LBB4_1: @ %for.body.preheader
+; CHECK-NEXT:    dls lr, r1
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:  .LBB4_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    strd r1, r1, [r0]
+; CHECK-NEXT:    strd r1, r1, [r0, #8]
+; CHECK-NEXT:    adds r0, #64
+; CHECK-NEXT:    le lr, .LBB4_2
+; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %x.addr.05 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
+  %0 = bitcast i32* %x.addr.05 to i8*
+  tail call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(16) %0, i8 0, i32 16, i1 false)
+  %add.ptr = getelementptr inbounds i32, i32* %x.addr.05, i32 16
+  %inc = add nuw nsw i32 %i.06, 1
+  %exitcond.not = icmp eq i32 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @test_memmove16(i32* nocapture %x, i32* nocapture readonly %y, i32 %n) {
+; CHECK-LABEL: test_memmove16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    cmp r2, #1
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    poplt {r4, pc}
+; CHECK-NEXT:  .LBB5_1: @ %for.body.preheader
+; CHECK-NEXT:    dls lr, r2
+; CHECK-NEXT:  .LBB5_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldm.w r1, {r2, r3, r12}
+; CHECK-NEXT:    ldr r4, [r1, #12]
+; CHECK-NEXT:    adds r1, #64
+; CHECK-NEXT:    stm.w r0, {r2, r3, r12}
+; CHECK-NEXT:    str r4, [r0, #12]
+; CHECK-NEXT:    adds r0, #64
+; CHECK-NEXT:    le lr, .LBB5_2
+; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, pc}
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %x.addr.08 = phi i32* [ %add.ptr, %for.body ], [ %x, %entry ]
+  %y.addr.07 = phi i32* [ %add.ptr1, %for.body ], [ %y, %entry ]
+  %0 = bitcast i32* %x.addr.08 to i8*
+  %1 = bitcast i32* %y.addr.07 to i8*
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* nonnull align 4 dereferenceable(16) %0, i8* nonnull align 4 dereferenceable(16) %1, i32 16, i1 false)
+  %add.ptr = getelementptr inbounds i32, i32* %x.addr.08, i32 16
+  %add.ptr1 = getelementptr inbounds i32, i32* %y.addr.07, i32 16
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond.not = icmp eq i32 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)
+declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg)


        


More information about the llvm-commits mailing list