[llvm] e4b9867 - [ARM] Expand cannotInsertWDLSTPBetween to the last instruction

David Green via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 28 01:15:22 PDT 2020


Author: David Green
Date: 2020-09-28T09:14:40+01:00
New Revision: e4b9867cb69acc272157ef1baaecd9848ad18ecc

URL: https://github.com/llvm/llvm-project/commit/e4b9867cb69acc272157ef1baaecd9848ad18ecc
DIFF: https://github.com/llvm/llvm-project/commit/e4b9867cb69acc272157ef1baaecd9848ad18ecc.diff

LOG: [ARM] Expand cannotInsertWDLSTPBetween to the last instruction

9d9a11c7be037 added this check for predicatable instructions between the
D/WLSTP and the loop's start, but it was missing the last instruction in
the block. Change it to use some iterators instead.

Differential Revision: https://reviews.llvm.org/D88354

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index ff50d94d328f..f01acd8adfbf 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -629,17 +629,15 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
   // width, the Loop Start instruction will immediately generate one or more
   // false lane mask which can, incorrectly, affect the proceeding MVE
   // instructions in the preheader.
-  auto cannotInsertWDLSTPBetween = [](MachineInstr *Begin,
-                                      MachineInstr *End) {
-    auto I = MachineBasicBlock::iterator(Begin);
-    auto E = MachineBasicBlock::iterator(End);
+  auto cannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
+                                      MachineBasicBlock::iterator E) {
     for (; I != E; ++I)
       if (shouldInspect(*I))
         return true;
     return false;
   };
 
-  if (cannotInsertWDLSTPBetween(StartInsertPt, &InsertBB->back()))
+  if (cannotInsertWDLSTPBetween(StartInsertPt, InsertBB->end()))
     return false;
 
   // Especially in the case of while loops, InsertBB may not be the

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
index e5d629c8730c..429a88884db9 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
@@ -46,6 +46,7 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r4, -8
   ; CHECK:   tCMPi8 renamable $r1, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   renamable $r12 = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   tBcc %bb.2, 2 /* CC::hs */, killed $cpsr
   ; CHECK: bb.1:
   ; CHECK:   liveins: $r2
@@ -54,24 +55,31 @@ body:             |
   ; CHECK:   tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
   ; CHECK: bb.2:
   ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   liveins: $r0, $r1, $r2
+  ; CHECK:   liveins: $r0, $r1, $r2, $r12
+  ; CHECK:   renamable $r4, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
+  ; CHECK:   tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2IT 11, 8, implicit-def $itstate
+  ; CHECK:   $r12 = tMOVr renamable $r1, 11 /* CC::lt */, killed $cpsr, implicit killed renamable $r12, implicit killed $itstate
+  ; CHECK:   renamable $r3 = t2SUBrr renamable $r1, killed renamable $r12, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg
   ; CHECK:   $r12 = tMOVr $r1, 14 /* CC::al */, $noreg
+  ; CHECK:   renamable $r4 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
   ; CHECK:   $r3 = tMOVr $r0, 14 /* CC::al */, $noreg
   ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r12
   ; CHECK: bb.3:
   ; CHECK:   successors: %bb.3(0x7c000000), %bb.4(0x04000000)
-  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $r3
+  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4
   ; CHECK:   renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r3, 0, 0, $noreg
   ; CHECK:   renamable $q0 = nnan ninf nsz MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
   ; CHECK:   renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14 /* CC::al */, $noreg
   ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.3
   ; CHECK: bb.4:
   ; CHECK:   successors: %bb.5(0x80000000)
-  ; CHECK:   liveins: $q0, $r0, $r1, $r2
+  ; CHECK:   liveins: $q0, $r0, $r1, $r2, $r4
   ; CHECK:   renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
   ; CHECK:   $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
-  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r3
+  ; CHECK:   $lr = t2DLS killed $r4
   ; CHECK:   renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14 /* CC::al */, $noreg, implicit killed $q0
   ; CHECK:   $s2 = VMOVSR $r1, 14 /* CC::al */, $noreg
@@ -80,13 +88,16 @@ body:             |
   ; CHECK:   renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
   ; CHECK: bb.5:
   ; CHECK:   successors: %bb.5(0x7c000000), %bb.6(0x04000000)
-  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $s4
+  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $r3, $s4
+  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
   ; CHECK:   $r4 = VMOVRS $s4, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg
-  ; CHECK:   renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 0, $noreg, undef renamable $q2
-  ; CHECK:   renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg
+  ; CHECK:   MVE_VPST 2, implicit $vpr
+  ; CHECK:   renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr
+  ; CHECK:   renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 1, renamable $vpr, undef renamable $q2
+  ; CHECK:   renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 1, killed renamable $vpr
+  ; CHECK:   renamable $r3, dead $cpsr = nsw tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 16, 14 /* CC::al */, $noreg
-  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.5
+  ; CHECK:   $lr = t2LEUpdate killed renamable $lr, %bb.5
   ; CHECK: bb.6:
   ; CHECK:   liveins: $q0, $r1, $r2
   ; CHECK:   renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
index 3cd24f8f5247..c176fcabdfb6 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
@@ -6,10 +6,19 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, lr}
 ; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    mov r3, r1
+; CHECK-NEXT:    cmp r1, #4
+; CHECK-NEXT:    it ge
+; CHECK-NEXT:    movge r3, #4
+; CHECK-NEXT:    mov.w r12, #1
+; CHECK-NEXT:    subs r3, r1, r3
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
+; CHECK-NEXT:    adds r3, #3
+; CHECK-NEXT:    add.w lr, r12, r3, lsr #2
 ; CHECK-NEXT:    mov r3, r1
 ; CHECK-NEXT:    mov r12, r0
 ; CHECK-NEXT:    dlstp.32 lr, r3
+; CHECK-NEXT:    mov r4, lr
 ; CHECK-NEXT:  .LBB0_1: @ %do.body.i
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q1, [r12], #16
@@ -18,7 +27,7 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
 ; CHECK-NEXT:  @ %bb.2: @ %arm_mean_f32_mve.exit
 ; CHECK-NEXT:    vmov s4, r1
 ; CHECK-NEXT:    mov r3, r1
-; CHECK-NEXT:    dlstp.32 lr, r3
+; CHECK-NEXT:    dls lr, r4
 ; CHECK-NEXT:    vadd.f32 s0, s3, s3
 ; CHECK-NEXT:    vcvt.f32.u32 s4, s4
 ; CHECK-NEXT:    vdiv.f32 s0, s0, s4
@@ -26,10 +35,13 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:  .LBB0_3: @ %do.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vldrw.u32 q1, [r0], #16
-; CHECK-NEXT:    vsub.f32 q1, q1, r12
-; CHECK-NEXT:    vfma.f32 q0, q1, q1
-; CHECK-NEXT:    letp lr, .LBB0_3
+; CHECK-NEXT:    vctp.32 r3
+; CHECK-NEXT:    subs r3, #4
+; CHECK-NEXT:    vpsttt
+; CHECK-NEXT:    vldrwt.u32 q1, [r0], #16
+; CHECK-NEXT:    vsubt.f32 q1, q1, r12
+; CHECK-NEXT:    vfmat.f32 q0, q1, q1
+; CHECK-NEXT:    le lr, .LBB0_3
 ; CHECK-NEXT:  @ %bb.4: @ %do.end
 ; CHECK-NEXT:    subs r0, r1, #1
 ; CHECK-NEXT:    vadd.f32 s0, s3, s3


        


More information about the llvm-commits mailing list