[llvm] e7a6df6 - [ARM] Fix the operand used for WLS in ARMLowOverheadLoops

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri May 21 01:29:45 PDT 2021


Author: David Green
Date: 2021-05-21T09:29:30+01:00
New Revision: e7a6df68a6f5cd1122582b89f8cb3d4ebfa49c71

URL: https://github.com/llvm/llvm-project/commit/e7a6df68a6f5cd1122582b89f8cb3d4ebfa49c71
DIFF: https://github.com/llvm/llvm-project/commit/e7a6df68a6f5cd1122582b89f8cb3d4ebfa49c71.diff

LOG: [ARM] Fix the operand used for WLS in ARMLowOverheadLoops

The Loop start instruction handled by the ARMLowOverheadLoops are:
$lr = t2DoLoopStart $r0
$lr = t2DoLoopStartTP $r1, $r0
$lr = t2WhileLoopStartLR $r0, %bb, implicit-def dead $cpsr
All three of these will have LR as the 0 argument, the trip count as the
1 argument.

This patch updated a few places in ARMLowOverheadLoops where the 0th arg
was being used for t2WhileLoopStartLR instructions as the trip count.
One place was entirely removed as it does not seem valid any more, the
case the code is trying to protect against should not be able to occur
with our correct-by-construction low overhead loops.

Differential Revision: https://reviews.llvm.org/D102620

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir
    llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 9637c9df900f2..c0fbc3aa1c50b 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -630,25 +630,6 @@ bool LowOverheadLoop::ValidateTailPredicate() {
     return false;
   }
 
-  // Check that creating a [W|D]LSTP, which will define LR with an element
-  // count instead of iteration count, won't affect any other instructions
-  // than the LoopStart and LoopDec.
-  // TODO: We should try to insert the [W|D]LSTP after any of the other uses.
-  Register StartReg = isDo(Start) ? Start->getOperand(1).getReg()
-                                  : Start->getOperand(0).getReg();
-  if (StartInsertPt == Start && StartReg == ARM::LR) {
-    if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) {
-      SmallPtrSet<MachineInstr *, 2> Uses;
-      RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses);
-      for (auto *Use : Uses) {
-        if (Use != Start && Use != Dec) {
-          LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
-          return false;
-        }
-      }
-    }
-  }
-
   // For tail predication, we need to provide the number of elements, instead
   // of the iteration count, to the loop start instruction. The number of
   // elements is provided to the vctp instruction, so we need to check that
@@ -1410,8 +1391,7 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
 
   LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");
 
-  MachineInstr *Def =
-      RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0);
+  MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, 1);
   if (!Def) {
     LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");
     return;

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
index 01a9f34de430c..deec9a66a1afa 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
@@ -142,21 +142,18 @@ body:             |
   ; CHECK:   renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
+  ; CHECK:   t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
+  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r12
   ; CHECK:   $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
   ; CHECK: bb.1.do.body:
   ; CHECK:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK:   liveins: $r0, $r1, $r2, $r12
+  ; CHECK:   liveins: $r0, $r1, $r2
   ; CHECK:   $lr = tMOVr $r2, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4)
+  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4)
   ; CHECK:   renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4)
-  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.1
+  ; CHECK:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4)
+  ; CHECK:   dead $lr = MVE_LETP killed renamable $lr, %bb.1
   ; CHECK: bb.2.do.end:
   ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   bb.0.entry:
@@ -246,21 +243,18 @@ body:             |
   ; CHECK:   renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
+  ; CHECK:   t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
+  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r12
   ; CHECK:   $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
   ; CHECK: bb.1.do.body:
   ; CHECK:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK:   liveins: $r0, $r1, $r2, $r12
+  ; CHECK:   liveins: $r0, $r1, $r2
   ; CHECK:   $lr = tMOVr $r2, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4)
+  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4)
   ; CHECK:   renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4)
-  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.1
+  ; CHECK:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4)
+  ; CHECK:   dead $lr = MVE_LETP killed renamable $lr, %bb.1
   ; CHECK: bb.2.do.end:
   ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   bb.0.entry:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir
index 8845cf8d7f454..78d4fcbc80919 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir
@@ -146,8 +146,6 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r4, -16
   ; CHECK:   renamable $r4 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.5)
   ; CHECK:   renamable $r12 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r5, dead $cpsr = tADDi3 renamable $r4, 3, 14 /* CC::al */, $noreg
-  ; CHECK:   dead renamable $r5, dead $cpsr = tLSRri killed renamable $r5, 2, 14 /* CC::al */, $noreg
   ; CHECK:   $lr = MVE_WLSTP_32 killed renamable $r4, %bb.3
   ; CHECK: bb.1.for.body.lr.ph:
   ; CHECK:   successors: %bb.2(0x80000000)

diff  --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
index d5b4d9fabf0ff..982978005bbec 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
@@ -325,20 +325,13 @@ define void @twoloops(i32* %X, i32 %n, i32 %m) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r7, lr}
 ; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    add.w r1, r2, #15
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
-; CHECK-NEXT:    bic r1, r1, #16
 ; CHECK-NEXT:    mov r3, r2
-; CHECK-NEXT:    lsr.w lr, r1, #4
 ; CHECK-NEXT:    mov r1, r0
-; CHECK-NEXT:    mov r12, lr
-; CHECK-NEXT:    wls lr, lr, .LBB13_2
+; CHECK-NEXT:    wlstp.8 lr, r3, .LBB13_2
 ; CHECK-NEXT:  .LBB13_1: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vctp.8 r3
-; CHECK-NEXT:    subs r3, #16
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vstrbt.8 q0, [r1], #16
-; CHECK-NEXT:    le lr, .LBB13_1
+; CHECK-NEXT:    vstrb.8 q0, [r1], #16
+; CHECK-NEXT:    letp lr, .LBB13_1
 ; CHECK-NEXT:  .LBB13_2: @ %entry
 ; CHECK-NEXT:    wlstp.8 lr, r2, .LBB13_4
 ; CHECK-NEXT:  .LBB13_3: @ =>This Inner Loop Header: Depth=1


        


More information about the llvm-commits mailing list