[llvm] a399d18 - [ARM] Find VPT implicitly predicated by VCTP

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 25 01:02:55 PDT 2020


Author: Sam Parker
Date: 2020-09-25T08:50:53+01:00
New Revision: a399d1880bc6e2a13cad02a2a3cb024c27d32ac2

URL: https://github.com/llvm/llvm-project/commit/a399d1880bc6e2a13cad02a2a3cb024c27d32ac2
DIFF: https://github.com/llvm/llvm-project/commit/a399d1880bc6e2a13cad02a2a3cb024c27d32ac2.diff

LOG: [ARM] Find VPT implicitly predicated by VCTP

On failing to find a VCTP in the list of instructions that explicitly
predicate the entry of a VPT block, inspect whether the block is
controlled via VPT which is implicitly predicated due to it's
predicated operand(s).

Differential Revision: https://reviews.llvm.org/D87819

Added: 
    

Modified: 
    llvm/lib/CodeGen/ReachingDefAnalysis.cpp
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 86c2f63fd3aa..f553bad31b94 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -433,7 +433,6 @@ MachineInstr *ReachingDefAnalysis::getUniqueReachingMIDef(MachineInstr *MI,
   if (LocalDef && InstIds.lookup(LocalDef) < InstIds.lookup(MI))
     return LocalDef;
 
-  SmallPtrSet<MachineBasicBlock*, 4> VisitedBBs;
   SmallPtrSet<MachineInstr*, 2> Incoming;
   MachineBasicBlock *Parent = MI->getParent();
   for (auto *Pred : Parent->predecessors())

diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 5fab487e9174..ff50d94d328f 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -256,12 +256,54 @@ namespace {
       return isPredicatedOnVCTP(Insts.front(), Exclusive);
     }
 
-    static bool isValid() {
+    // If this block begins with a VPT, we can check whether it's using
+    // at least one predicated input(s), as well as possible loop invariant
+    // which would result in it being implicitly predicated.
+    static bool hasImplicitlyValidVPT(VPTState &Block,
+                                      ReachingDefAnalysis &RDA) {
+      SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
+      MachineInstr *VPT = Insts.front();
+      assert(isVPTOpcode(VPT->getOpcode()) &&
+             "Expected VPT block to begin with VPT/VPST");
+
+      if (VPT->getOpcode() == ARM::MVE_VPST)
+        return false;
+
+      auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
+        MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
+        return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
+      };
+
+      auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {
+        MachineOperand &MO = MI->getOperand(Idx);
+        if (!MO.isReg() || !MO.getReg())
+          return true;
+
+        SmallPtrSet<MachineInstr *, 2> Defs;
+        RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs);
+        if (Defs.empty())
+          return true;
+
+        for (auto *Def : Defs)
+          if (Def->getParent() == VPT->getParent())
+            return false;
+        return true;
+      };
+
+      // Check that at least one of the operands is directly predicated on a
+      // vctp and allow an invariant value too.
+      return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
+             (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
+             (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
+    }
+
+    static bool isValid(ReachingDefAnalysis &RDA) {
       // All predication within the loop should be based on vctp. If the block
       // isn't predicated on entry, check whether the vctp is within the block
       // and that all other instructions are then predicated on it.
       for (auto &Block : Blocks) {
-        if (isEntryPredicatedOnVCTP(Block))
+        if (isEntryPredicatedOnVCTP(Block, false) ||
+            hasImplicitlyValidVPT(Block, RDA))
           continue;
 
         SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();
@@ -517,7 +559,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
     return false;
   }
 
-  if (!VPTState::isValid())
+  if (!VPTState::isValid(RDA))
     return false;
 
   if (!ValidateLiveOuts()) {

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll
index 6ce2b9f5f1c0..f334a5950acb 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll
@@ -7,23 +7,13 @@ define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    ldrd r5, r12, [sp, #80]
-; CHECK-NEXT:    cmp.w r12, #4
-; CHECK-NEXT:    mov r4, r12
 ; CHECK-NEXT:    vmvn.i32 q0, #0x80000000
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r4, #4
 ; CHECK-NEXT:    vmov.i32 q1, #0x3f
-; CHECK-NEXT:    sub.w r4, r12, r4
 ; CHECK-NEXT:    vmov.i32 q2, #0x1
-; CHECK-NEXT:    add.w lr, r4, #3
-; CHECK-NEXT:    movs r4, #1
-; CHECK-NEXT:    add.w lr, r4, lr, lsr #2
-; CHECK-NEXT:    dls lr, lr
+; CHECK-NEXT:    dlstp.32 lr, r12
 ; CHECK-NEXT:  .LBB0_1: @ %bb6
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vctp.32 r12
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vldrwt.u32 q4, [r1], #16
+; CHECK-NEXT:    vldrw.u32 q4, [r1], #16
 ; CHECK-NEXT:    vabs.s32 q5, q4
 ; CHECK-NEXT:    vcls.s32 q3, q5
 ; CHECK-NEXT:    vshl.u32 q5, q5, q3
@@ -41,15 +31,11 @@ define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg
 ; CHECK-NEXT:    vqshl.s32 q5, q5, #1
 ; CHECK-NEXT:    vpt.s32 lt, q4, zr
 ; CHECK-NEXT:    vnegt.s32 q5, q5
-; CHECK-NEXT:    vctp.32 r12
-; CHECK-NEXT:    sub.w r12, r12, #4
-; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vldrwt.u32 q4, [r0], #16
+; CHECK-NEXT:    vldrw.u32 q4, [r0], #16
 ; CHECK-NEXT:    vqrdmulh.s32 q4, q4, q5
-; CHECK-NEXT:    vpstt
-; CHECK-NEXT:    vstrwt.32 q4, [r2], #16
-; CHECK-NEXT:    vstrwt.32 q3, [r3], #16
-; CHECK-NEXT:    le lr, .LBB0_1
+; CHECK-NEXT:    vstrw.32 q4, [r2], #16
+; CHECK-NEXT:    vstrw.32 q3, [r3], #16
+; CHECK-NEXT:    letp lr, .LBB0_1
 ; CHECK-NEXT:  @ %bb.2: @ %bb44
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    pop {r4, r5, r7, pc}

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir
index c455527b36f5..60a578d81594 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vpt-blocks.mir
@@ -593,26 +593,17 @@ body:             |
   ; CHECK: bb.1.vector.ph:
   ; CHECK:   successors: %bb.2(0x80000000)
   ; CHECK:   liveins: $r0, $r1, $r2
-  ; CHECK:   renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
-  ; CHECK:   renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   renamable $r3, dead $cpsr = nsw tRSB renamable $r2, 14 /* CC::al */, $noreg
-  ; CHECK:   $lr = t2DLS killed renamable $lr
+  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r1
   ; CHECK: bb.2.vector.body:
   ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $r3
-  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg
-  ; CHECK:   MVE_VPST 8, implicit $vpr
-  ; CHECK:   renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, killed renamable $vpr
-  ; CHECK:   MVE_VPTv4s32r 14, renamable $q1, renamable $r2, 10, implicit-def $vpr
+  ; CHECK:   liveins: $lr, $q0, $r0, $r2, $r3
+  ; CHECK:   renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, killed $noreg
+  ; CHECK:   MVE_VPTv4s32r 12, renamable $q1, renamable $r2, 10, implicit-def $vpr
   ; CHECK:   renamable $vpr = MVE_VCMPs32r killed renamable $q1, renamable $r3, 13, 1, killed renamable $vpr
-  ; CHECK:   renamable $vpr = MVE_VCTP32 renamable $r1, 2, killed renamable $vpr
   ; CHECK:   renamable $r0 = MVE_VSTRWU32_post renamable $q0, killed renamable $r0, 16, 2, killed renamable $vpr
-  ; CHECK:   renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
-  ; CHECK:   $lr = t2LEUpdate killed renamable $lr, %bb.2
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.2
   ; CHECK: bb.3.for.cond.cleanup:
   ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   ;


        


More information about the llvm-commits mailing list