[llvm] 53c42f7 - [ARM] Ensure WLS preheader blocks have branches during memcpy lowering

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon May 24 03:26:58 PDT 2021


Author: David Green
Date: 2021-05-24T11:26:45+01:00
New Revision: 53c42f7700e824d6ec394614653abd8b33d5da34

URL: https://github.com/llvm/llvm-project/commit/53c42f7700e824d6ec394614653abd8b33d5da34
DIFF: https://github.com/llvm/llvm-project/commit/53c42f7700e824d6ec394614653abd8b33d5da34.diff

LOG: [ARM] Ensure WLS preheader blocks have branches during memcpy lowering

This makes sure that the blocks created for lowering memcpy to loops end
up with branches, even if they fall through to the successor. Otherwise
IfCvt is getting confused with unanalyzable branches and creating
invalid block layouts.

The extra branches should be removed as the tail predicated loop is
finalized in almost all cases.

Added: 
    llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll

Modified: 
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 0a37ec388318..38c6c32a1609 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -11133,6 +11133,10 @@ static Register genTPEntry(MachineBasicBlock *TpEntry,
       .addUse(TotalIterationsReg)
       .addMBB(TpExit);
 
+  BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
+      .addMBB(TpLoopBody)
+      .add(predOps(ARMCC::AL));
+
   return TotalIterationsReg;
 }
 

diff  --git a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
new file mode 100644
index 000000000000..d0929ab3b20b
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
@@ -0,0 +1,374 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -arm-memtransfer-tploop=force-enabled %s -o - | FileCheck %s
+
+; In this test, the successors of various blocks were becoming invalid after
+; ifcvt as the blocks did not properly fall through to the successor after a
+; WhileLoopStart
+
+ at arr_183 = external dso_local local_unnamed_addr global [20 x [23 x [19 x i8]]], align 1
+define i32 @a(i8 zeroext %b, [3 x i8]* nocapture readonly %c, [3 x i32]* nocapture readonly %d) {
+; CHECK-LABEL: a:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    cmp r0, #1
+; CHECK-NEXT:    bls.w .LBB0_11
+; CHECK-NEXT:  @ %bb.1: @ %for.body.us.preheader
+; CHECK-NEXT:    movw r5, :lower16:arr_183
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    movt r5, :upper16:arr_183
+; CHECK-NEXT:    mov.w r12, #19
+; CHECK-NEXT:    vmov.i32 q0, #0x0
+; CHECK-NEXT:    vmov.i32 q1, #0x0
+; CHECK-NEXT:    vmov.i32 q2, #0x0
+; CHECK-NEXT:    vmov.i32 q3, #0x0
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_2: @ %land.end.us.3
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    movs r3, #1
+; CHECK-NEXT:  .LBB0_3: @ %for.body.us
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB0_4 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_6 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_8 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_10 Depth 2
+; CHECK-NEXT:    ldr.w r0, [r2, r3, lsl #2]
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ite ne
+; CHECK-NEXT:    ldrbne r0, [r1, r3]
+; CHECK-NEXT:    moveq r0, #0
+; CHECK-NEXT:    mla r3, r3, r12, r5
+; CHECK-NEXT:    add r3, r0
+; CHECK-NEXT:    rsb.w r0, r0, #108
+; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_5
+; CHECK-NEXT:  .LBB0_4: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q0, [r3], #16
+; CHECK-NEXT:    letp lr, .LBB0_4
+; CHECK-NEXT:  .LBB0_5: @ %land.end.us
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    ldr r0, [r2, #4]
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ite ne
+; CHECK-NEXT:    ldrbne r0, [r1, #1]
+; CHECK-NEXT:    moveq r0, #0
+; CHECK-NEXT:    adds r3, r5, r0
+; CHECK-NEXT:    rsb.w r0, r0, #108
+; CHECK-NEXT:    adds r3, #19
+; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_7
+; CHECK-NEXT:  .LBB0_6: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q1, [r3], #16
+; CHECK-NEXT:    letp lr, .LBB0_6
+; CHECK-NEXT:  .LBB0_7: @ %land.end.us.1
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    ldr r0, [r2, #4]
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ite ne
+; CHECK-NEXT:    ldrbne r0, [r1, #1]
+; CHECK-NEXT:    moveq r0, #0
+; CHECK-NEXT:    adds r3, r5, r0
+; CHECK-NEXT:    rsb.w r0, r0, #108
+; CHECK-NEXT:    adds r3, #19
+; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_9
+; CHECK-NEXT:  .LBB0_8: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q2, [r3], #16
+; CHECK-NEXT:    letp lr, .LBB0_8
+; CHECK-NEXT:  .LBB0_9: @ %land.end.us.2
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    ldr r0, [r2, #4]
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ite ne
+; CHECK-NEXT:    ldrbne r0, [r1, #1]
+; CHECK-NEXT:    moveq r0, #0
+; CHECK-NEXT:    adds r3, r5, r0
+; CHECK-NEXT:    rsb.w r0, r0, #108
+; CHECK-NEXT:    add.w r4, r0, #15
+; CHECK-NEXT:    adds r3, #19
+; CHECK-NEXT:    lsrs r4, r4, #4
+; CHECK-NEXT:    subs.w lr, r4, #0
+; CHECK-NEXT:    beq .LBB0_2
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_10: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vctp.8 r0
+; CHECK-NEXT:    subs r0, #16
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vstrbt.8 q3, [r3], #16
+; CHECK-NEXT:    subs.w lr, lr, #1
+; CHECK-NEXT:    bne .LBB0_10
+; CHECK-NEXT:    b .LBB0_2
+; CHECK-NEXT:  .LBB0_11:
+; CHECK-NEXT:    movw r12, :lower16:arr_183
+; CHECK-NEXT:    vmov.i32 q0, #0x0
+; CHECK-NEXT:    movt r12, :upper16:arr_183
+; CHECK-NEXT:    vmov.i32 q1, #0x0
+; CHECK-NEXT:    vmov.i32 q2, #0x0
+; CHECK-NEXT:    vmov.i32 q3, #0x0
+; CHECK-NEXT:    b .LBB0_13
+; CHECK-NEXT:  .LBB0_12: @ %for.body.lr.ph.3
+; CHECK-NEXT:    @ in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    ldr r3, [r2, #4]
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    ite ne
+; CHECK-NEXT:    ldrbne r3, [r1, #1]
+; CHECK-NEXT:    moveq r3, #0
+; CHECK-NEXT:    add.w r5, r12, r3
+; CHECK-NEXT:    rsb.w r3, r3, #108
+; CHECK-NEXT:    add.w r4, r5, #19
+; CHECK-NEXT:    add.w r5, r3, #15
+; CHECK-NEXT:    lsrs r5, r5, #4
+; CHECK-NEXT:    subs.w lr, r5, #0
+; CHECK-NEXT:    beq .LBB0_13
+; CHECK-NEXT:    b .LBB0_23
+; CHECK-NEXT:  .LBB0_13: @ %for.cond
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB0_15 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_18 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_21 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_23 Depth 2
+; CHECK-NEXT:    cmp r0, #2
+; CHECK-NEXT:    blo .LBB0_16
+; CHECK-NEXT:  @ %bb.14: @ %for.body.lr.ph
+; CHECK-NEXT:    @ in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    ldr r3, [r2, #4]
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    ite ne
+; CHECK-NEXT:    ldrbne r3, [r1, #1]
+; CHECK-NEXT:    moveq r3, #0
+; CHECK-NEXT:    add.w r5, r12, r3
+; CHECK-NEXT:    rsb.w r3, r3, #108
+; CHECK-NEXT:    add.w r4, r5, #19
+; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_16
+; CHECK-NEXT:  .LBB0_15: @ Parent Loop BB0_13 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q0, [r4], #16
+; CHECK-NEXT:    letp lr, .LBB0_15
+; CHECK-NEXT:  .LBB0_16: @ %for.cond.backedge
+; CHECK-NEXT:    @ in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    cmp r0, #2
+; CHECK-NEXT:    blo .LBB0_19
+; CHECK-NEXT:  @ %bb.17: @ %for.body.lr.ph.1
+; CHECK-NEXT:    @ in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    ldr r3, [r2, #4]
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    ite ne
+; CHECK-NEXT:    ldrbne r3, [r1, #1]
+; CHECK-NEXT:    moveq r3, #0
+; CHECK-NEXT:    add.w r5, r12, r3
+; CHECK-NEXT:    rsb.w r3, r3, #108
+; CHECK-NEXT:    add.w r4, r5, #19
+; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_19
+; CHECK-NEXT:  .LBB0_18: @ Parent Loop BB0_13 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q1, [r4], #16
+; CHECK-NEXT:    letp lr, .LBB0_18
+; CHECK-NEXT:  .LBB0_19: @ %for.cond.backedge.1
+; CHECK-NEXT:    @ in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    cmp r0, #2
+; CHECK-NEXT:    blo .LBB0_22
+; CHECK-NEXT:  @ %bb.20: @ %for.body.lr.ph.2
+; CHECK-NEXT:    @ in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    ldr r3, [r2, #4]
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    ite ne
+; CHECK-NEXT:    ldrbne r3, [r1, #1]
+; CHECK-NEXT:    moveq r3, #0
+; CHECK-NEXT:    add.w r5, r12, r3
+; CHECK-NEXT:    rsb.w r3, r3, #108
+; CHECK-NEXT:    add.w r4, r5, #19
+; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_22
+; CHECK-NEXT:  .LBB0_21: @ Parent Loop BB0_13 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q2, [r4], #16
+; CHECK-NEXT:    letp lr, .LBB0_21
+; CHECK-NEXT:  .LBB0_22: @ %for.cond.backedge.2
+; CHECK-NEXT:    @ in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    cmp r0, #2
+; CHECK-NEXT:    blo .LBB0_13
+; CHECK-NEXT:    b .LBB0_12
+; CHECK-NEXT:  .LBB0_23: @ Parent Loop BB0_13 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vctp.8 r3
+; CHECK-NEXT:    subs r3, #16
+; CHECK-NEXT:    vpst
+; CHECK-NEXT:    vstrbt.8 q3, [r4], #16
+; CHECK-NEXT:    subs.w lr, lr, #1
+; CHECK-NEXT:    bne .LBB0_23
+; CHECK-NEXT:    b .LBB0_13
+entry:
+  %cmp = icmp ugt i8 %b, 1
+  br i1 %cmp, label %for.body.us.preheader, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  %cmp43 = icmp ugt i8 %b, 1
+  %arrayidx6 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
+  %arrayidx12 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
+  %cmp43.1 = icmp ugt i8 %b, 1
+  %arrayidx6.1 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
+  %arrayidx12.1 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
+  %cmp43.2 = icmp ugt i8 %b, 1
+  %arrayidx6.2 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
+  %arrayidx12.2 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
+  %cmp43.3 = icmp ugt i8 %b, 1
+  %arrayidx6.3 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
+  %arrayidx12.3 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
+  br label %for.cond
+
+for.body.us.preheader:                            ; preds = %entry
+  %arrayidx6.us.1 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
+  %arrayidx12.us.1 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
+  %arrayidx6.us.2 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
+  %arrayidx12.us.2 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
+  %arrayidx6.us.3 = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 1
+  %arrayidx12.us.3 = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 1
+  br label %for.body.us
+
+for.cond:                                         ; preds = %for.cond.backedge.3, %for.cond.preheader
+  br i1 %cmp43, label %for.body.lr.ph, label %for.cond.backedge
+
+for.body.lr.ph:                                   ; preds = %for.cond
+  %0 = load i32, i32* %arrayidx6, align 4
+  %tobool7.not = icmp eq i32 %0, 0
+  br i1 %tobool7.not, label %land.end, label %land.rhs
+
+for.body.us:                                      ; preds = %land.end.us.3, %for.body.us.preheader
+  %conv44.us = phi i32 [ 0, %for.body.us.preheader ], [ 1, %land.end.us.3 ]
+  %arrayidx6.us = getelementptr inbounds [3 x i32], [3 x i32]* %d, i32 0, i32 %conv44.us
+  %1 = load i32, i32* %arrayidx6.us, align 4
+  %tobool7.not.us = icmp eq i32 %1, 0
+  br i1 %tobool7.not.us, label %land.end.us, label %land.rhs.us
+
+land.rhs.us:                                      ; preds = %for.body.us
+  %arrayidx12.us = getelementptr inbounds [3 x i8], [3 x i8]* %c, i32 0, i32 %conv44.us
+  %2 = load i8, i8* %arrayidx12.us, align 1
+  %tobool13.us = zext i8 %2 to i32
+  br label %land.end.us
+
+land.end.us:                                      ; preds = %land.rhs.us, %for.body.us
+  %3 = phi i32 [ 0, %for.body.us ], [ %tobool13.us, %land.rhs.us ]
+  %scevgep45 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 %conv44.us, i32 %3
+  %4 = sub nuw nsw i32 108, %3
+  call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45, i8 0, i32 %4, i1 false)
+  %5 = load i32, i32* %arrayidx6.us.1, align 4
+  %tobool7.not.us.1 = icmp eq i32 %5, 0
+  br i1 %tobool7.not.us.1, label %land.end.us.1, label %land.rhs.us.1
+
+land.rhs:                                         ; preds = %for.body.lr.ph
+  %6 = load i8, i8* %arrayidx12, align 1
+  %tobool13 = zext i8 %6 to i32
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %for.body.lr.ph
+  %7 = phi i32 [ 0, %for.body.lr.ph ], [ %tobool13, %land.rhs ]
+  %scevgep = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %7
+  %8 = sub nuw nsw i32 108, %7
+  call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep, i8 0, i32 %8, i1 false)
+  br label %for.cond.backedge
+
+for.cond.backedge:                                ; preds = %land.end, %for.cond
+  br i1 %cmp43.1, label %for.body.lr.ph.1, label %for.cond.backedge.1
+
+for.body.lr.ph.1:                                 ; preds = %for.cond.backedge
+  %9 = load i32, i32* %arrayidx6.1, align 4
+  %tobool7.not.1 = icmp eq i32 %9, 0
+  br i1 %tobool7.not.1, label %land.end.1, label %land.rhs.1
+
+land.rhs.1:                                       ; preds = %for.body.lr.ph.1
+  %10 = load i8, i8* %arrayidx12.1, align 1
+  %tobool13.1 = zext i8 %10 to i32
+  br label %land.end.1
+
+land.end.1:                                       ; preds = %land.rhs.1, %for.body.lr.ph.1
+  %11 = phi i32 [ 0, %for.body.lr.ph.1 ], [ %tobool13.1, %land.rhs.1 ]
+  %scevgep.1 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %11
+  %12 = sub nuw nsw i32 108, %11
+  call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.1, i8 0, i32 %12, i1 false)
+  br label %for.cond.backedge.1
+
+for.cond.backedge.1:                              ; preds = %land.end.1, %for.cond.backedge
+  br i1 %cmp43.2, label %for.body.lr.ph.2, label %for.cond.backedge.2
+
+for.body.lr.ph.2:                                 ; preds = %for.cond.backedge.1
+  %13 = load i32, i32* %arrayidx6.2, align 4
+  %tobool7.not.2 = icmp eq i32 %13, 0
+  br i1 %tobool7.not.2, label %land.end.2, label %land.rhs.2
+
+land.rhs.2:                                       ; preds = %for.body.lr.ph.2
+  %14 = load i8, i8* %arrayidx12.2, align 1
+  %tobool13.2 = zext i8 %14 to i32
+  br label %land.end.2
+
+land.end.2:                                       ; preds = %land.rhs.2, %for.body.lr.ph.2
+  %15 = phi i32 [ 0, %for.body.lr.ph.2 ], [ %tobool13.2, %land.rhs.2 ]
+  %scevgep.2 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %15
+  %16 = sub nuw nsw i32 108, %15
+  call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.2, i8 0, i32 %16, i1 false)
+  br label %for.cond.backedge.2
+
+for.cond.backedge.2:                              ; preds = %land.end.2, %for.cond.backedge.1
+  br i1 %cmp43.3, label %for.body.lr.ph.3, label %for.cond.backedge.3
+
+for.body.lr.ph.3:                                 ; preds = %for.cond.backedge.2
+  %17 = load i32, i32* %arrayidx6.3, align 4
+  %tobool7.not.3 = icmp eq i32 %17, 0
+  br i1 %tobool7.not.3, label %land.end.3, label %land.rhs.3
+
+land.rhs.3:                                       ; preds = %for.body.lr.ph.3
+  %18 = load i8, i8* %arrayidx12.3, align 1
+  %tobool13.3 = zext i8 %18 to i32
+  br label %land.end.3
+
+land.end.3:                                       ; preds = %land.rhs.3, %for.body.lr.ph.3
+  %19 = phi i32 [ 0, %for.body.lr.ph.3 ], [ %tobool13.3, %land.rhs.3 ]
+  %scevgep.3 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %19
+  %20 = sub nuw nsw i32 108, %19
+  call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep.3, i8 0, i32 %20, i1 false)
+  br label %for.cond.backedge.3
+
+for.cond.backedge.3:                              ; preds = %land.end.3, %for.cond.backedge.2
+  br label %for.cond
+
+land.rhs.us.1:                                    ; preds = %land.end.us
+  %21 = load i8, i8* %arrayidx12.us.1, align 1
+  %tobool13.us.1 = zext i8 %21 to i32
+  br label %land.end.us.1
+
+land.end.us.1:                                    ; preds = %land.rhs.us.1, %land.end.us
+  %22 = phi i32 [ 0, %land.end.us ], [ %tobool13.us.1, %land.rhs.us.1 ]
+  %scevgep45.1 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %22
+  %23 = sub nuw nsw i32 108, %22
+  call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.1, i8 0, i32 %23, i1 false)
+  %24 = load i32, i32* %arrayidx6.us.2, align 4
+  %tobool7.not.us.2 = icmp eq i32 %24, 0
+  br i1 %tobool7.not.us.2, label %land.end.us.2, label %land.rhs.us.2
+
+land.rhs.us.2:                                    ; preds = %land.end.us.1
+  %25 = load i8, i8* %arrayidx12.us.2, align 1
+  %tobool13.us.2 = zext i8 %25 to i32
+  br label %land.end.us.2
+
+land.end.us.2:                                    ; preds = %land.rhs.us.2, %land.end.us.1
+  %26 = phi i32 [ 0, %land.end.us.1 ], [ %tobool13.us.2, %land.rhs.us.2 ]
+  %scevgep45.2 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %26
+  %27 = sub nuw nsw i32 108, %26
+  call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.2, i8 0, i32 %27, i1 false)
+  %28 = load i32, i32* %arrayidx6.us.3, align 4
+  %tobool7.not.us.3 = icmp eq i32 %28, 0
+  br i1 %tobool7.not.us.3, label %land.end.us.3, label %land.rhs.us.3
+
+land.rhs.us.3:                                    ; preds = %land.end.us.2
+  %29 = load i8, i8* %arrayidx12.us.3, align 1
+  %tobool13.us.3 = zext i8 %29 to i32
+  br label %land.end.us.3
+
+land.end.us.3:                                    ; preds = %land.rhs.us.3, %land.end.us.2
+  %30 = phi i32 [ 0, %land.end.us.2 ], [ %tobool13.us.3, %land.rhs.us.3 ]
+  %scevgep45.3 = getelementptr [20 x [23 x [19 x i8]]], [20 x [23 x [19 x i8]]]* @arr_183, i32 0, i32 0, i32 1, i32 %30
+  %31 = sub nuw nsw i32 108, %30
+  call void @llvm.memset.p0i8.i32(i8* align 1 %scevgep45.3, i8 0, i32 %31, i1 false)
+  br label %for.body.us
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)

diff  --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
index 493f2e683de5..c4ba54ea6805 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
@@ -240,6 +240,7 @@ define void @test11(i8* nocapture %x, i8* nocapture %y, i32 %n) {
 ; CHECK-NEXT:    mov r3, r2
 ; CHECK-NEXT:    subs.w lr, lr, #0
 ; CHECK-NEXT:    beq .LBB10_3
+; CHECK-NEXT:    b .LBB10_2
 ; CHECK-NEXT:  .LBB10_2: @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vctp.8 r3
 ; CHECK-NEXT:    subs r3, #16


        


More information about the llvm-commits mailing list