[llvm] 6ec5f32 - [ARM][LowOverheadLoops] Iteration count liveness

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 1 02:14:15 PDT 2020


Author: Sam Parker
Date: 2020-10-01T10:11:10+01:00
New Revision: 6ec5f324973dfbe7b4a489593dde5073ff63ff64

URL: https://github.com/llvm/llvm-project/commit/6ec5f324973dfbe7b4a489593dde5073ff63ff64
DIFF: https://github.com/llvm/llvm-project/commit/6ec5f324973dfbe7b4a489593dde5073ff63ff64.diff

LOG: [ARM][LowOverheadLoops] Iteration count liveness

Before deciding to insert a [W|D]LSTP, check that defining LR with
the element count won't affect any other instructions that should be
taking the iteration count.

Differential Revision: https://reviews.llvm.org/D88549

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index fe28470c99ec..cd9c38752ad2 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -611,6 +611,23 @@ bool LowOverheadLoop::ValidateTailPredicate() {
     return false;
   }
 
+  // Check that creating a [W|D]LSTP, which will define LR with an element
+  // count instead of iteration count, won't affect any other instructions
+  // than the LoopStart and LoopDec.
+  // TODO: We should try to insert the [W|D]LSTP after any of the other uses.
+  if (StartInsertPt == Start && Start->getOperand(0).getReg() == ARM::LR) {
+    if (auto *IterCount = RDA.getMIOperand(Start, 0)) {
+      SmallPtrSet<MachineInstr *, 2> Uses;
+      RDA.getGlobalUses(IterCount, ARM::LR, Uses);
+      for (auto *Use : Uses) {
+        if (Use != Start && Use != Dec) {
+          LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use);
+          return false;
+        }
+      }
+    }
+  }
+
   // For tail predication, we need to provide the number of elements, instead
   // of the iteration count, to the loop start instruction. The number of
   // elements is provided to the vctp instruction, so we need to check that

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
index c5713c8224b5..c2de31ddef1f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir
@@ -142,18 +142,22 @@ body:             |
   ; CHECK:   renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
-  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r12
+  ; CHECK:   t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
+  ; CHECK:   $lr = t2DLS killed renamable $lr
   ; CHECK:   $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
   ; CHECK: bb.1.do.body:
   ; CHECK:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK:   liveins: $r0, $r1, $r2
+  ; CHECK:   liveins: $r0, $r1, $r2, $r12
   ; CHECK:   $lr = tMOVr $r2, 14 /* CC::al */, $noreg
+  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4)
+  ; CHECK:   renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   MVE_VPST 8, implicit $vpr
+  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4)
   ; CHECK:   renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0
-  ; CHECK:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4)
-  ; CHECK:   dead $lr = MVE_LETP killed renamable $lr, %bb.1
+  ; CHECK:   MVE_VPST 8, implicit $vpr
+  ; CHECK:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4)
+  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.1
   ; CHECK: bb.2.do.end:
   ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   bb.0.entry:
@@ -242,19 +246,23 @@ body:             |
   ; CHECK:   renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
-  ; CHECK:   dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r12
+  ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   $lr = t2DLS killed renamable $lr
   ; CHECK:   t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.iter.addr)
   ; CHECK:   $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
   ; CHECK: bb.1.do.body:
   ; CHECK:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK:   liveins: $r0, $r1, $r2
+  ; CHECK:   liveins: $r0, $r1, $r2, $r12
   ; CHECK:   $lr = tMOVr $r2, 14 /* CC::al */, $noreg
+  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.02, align 4)
+  ; CHECK:   renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   MVE_VPST 8, implicit $vpr
+  ; CHECK:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.02, align 4)
   ; CHECK:   renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, undef renamable $q0
-  ; CHECK:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg :: (store 16 into %ir.pDst.addr.01, align 4)
-  ; CHECK:   dead $lr = MVE_LETP killed renamable $lr, %bb.1
+  ; CHECK:   MVE_VPST 8, implicit $vpr
+  ; CHECK:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr :: (store 16 into %ir.pDst.addr.01, align 4)
+  ; CHECK:   dead $lr = t2LEUpdate killed renamable $lr, %bb.1
   ; CHECK: bb.2.do.end:
   ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   bb.0.entry:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
index b71c2dd7aaa0..94e3e26c819d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
@@ -160,17 +160,20 @@ body:             |
   ; CHECK:   renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
   ; CHECK:   renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg
-  ; CHECK:   dead renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
   ; CHECK:   $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
-  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r3
+  ; CHECK:   $lr = t2DLS killed renamable $lr
   ; CHECK:   $r4 = tMOVr $lr, 14 /* CC::al */, $noreg
   ; CHECK: bb.1.do.body.i:
   ; CHECK:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12
-  ; CHECK:   renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
-  ; CHECK:   renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
-  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.1
+  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12
+  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
+  ; CHECK:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
+  ; CHECK:   MVE_VPST 4, implicit $vpr
+  ; CHECK:   renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
+  ; CHECK:   renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0
+  ; CHECK:   $lr = t2LEUpdate killed renamable $lr, %bb.1
   ; CHECK: bb.2.arm_mean_f32_mve.exit:
   ; CHECK:   successors: %bb.3(0x80000000)
   ; CHECK:   liveins: $q0, $r0, $r1, $r2, $r4

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
index 0e182ece4a75..1404075dce90 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
@@ -17,13 +17,16 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
 ; CHECK-NEXT:    add.w lr, r12, r3, lsr #2
 ; CHECK-NEXT:    mov r3, r1
 ; CHECK-NEXT:    mov r12, r0
-; CHECK-NEXT:    dlstp.32 lr, r3
+; CHECK-NEXT:    dls lr, lr
 ; CHECK-NEXT:    mov r4, lr
 ; CHECK-NEXT:  .LBB0_1: @ %do.body.i
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vldrw.u32 q1, [r12], #16
-; CHECK-NEXT:    vadd.f32 q0, q0, q1
-; CHECK-NEXT:    letp lr, .LBB0_1
+; CHECK-NEXT:    vctp.32 r3
+; CHECK-NEXT:    subs r3, #4
+; CHECK-NEXT:    vpstt
+; CHECK-NEXT:    vldrwt.u32 q1, [r12], #16
+; CHECK-NEXT:    vaddt.f32 q0, q0, q1
+; CHECK-NEXT:    le lr, .LBB0_1
 ; CHECK-NEXT:  @ %bb.2: @ %arm_mean_f32_mve.exit
 ; CHECK-NEXT:    vmov s4, r1
 ; CHECK-NEXT:    vadd.f32 s0, s3, s3


        


More information about the llvm-commits mailing list