[llvm] a67eb22 - [RDA][ARM][LowOverheadLoops] Iteration count IT blocks

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 05:52:08 PST 2020


Author: Sam Parker
Date: 2020-02-24T13:51:03Z
New Revision: a67eb221e2281350eeab5dd4b9119895c500674c

URL: https://github.com/llvm/llvm-project/commit/a67eb221e2281350eeab5dd4b9119895c500674c
DIFF: https://github.com/llvm/llvm-project/commit/a67eb221e2281350eeab5dd4b9119895c500674c.diff

LOG: [RDA][ARM][LowOverheadLoops] Iteration count IT blocks

Change the way that we remove the redundant iteration count code in
the presence of IT blocks. collectLocalKilledOperands has been
introduced to scan an instructions operands, collecting the killed
instructions and then visiting them too. This is used to delete the
code in the preheader which calculates the iteration count. We also
track any IT blocks within the preheader and, if we remove all the
instructions from the IT block, we also remove the IT instruction.
isSafeToRemove is used to remove any redundant uses of the iteration
count within the loop body.

Differential Revision: https://reviews.llvm.org/D74975

Added: 
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir

Modified: 
    llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
    llvm/lib/CodeGen/ReachingDefAnalysis.cpp
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
index 0b4b0007e946..8aea57f683b6 100644
--- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -164,6 +164,10 @@ class ReachingDefAnalysis : public MachineFunctionPass {
   /// Return whether From can be moved backwards to just after To.
   bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const;
 
+  /// Assuming MI is dead, recursively search the incoming operands which are
+  /// killed by MI and collect those that would become dead.
+  void collectLocalKilledOperands(MachineInstr *MI, InstSet &Dead) const;
+
   /// Return whether removing this instruction will have no effect on the
   /// program, returning the redundant use-def chain.
   bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const;

diff  --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index c14fa8c43b38..74707ff84149 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -472,6 +472,32 @@ ReachingDefAnalysis::isSafeToRemove(MachineInstr *MI, InstSet &Visited,
   return true;
 }
 
+void ReachingDefAnalysis::collectLocalKilledOperands(MachineInstr *MI,
+                                                     InstSet &Dead) const {
+  Dead.insert(MI);
+  auto IsDead = [this](MachineInstr *Def, int PhysReg) {
+    unsigned LiveDefs = 0;
+    for (auto &MO : Def->defs())
+      if (!MO.isDead())
+        ++LiveDefs;
+
+    if (LiveDefs > 1)
+      return false;
+
+    SmallPtrSet<MachineInstr*, 4> Uses;
+    getGlobalUses(Def, PhysReg, Uses);
+    return Uses.size() == 1;
+  };
+
+  for (auto &MO : MI->uses()) {
+    if (!MO.isReg() || MO.getReg() == 0 || !MO.isKill())
+      continue;
+    if (MachineInstr *Def = getReachingMIDef(MI, MO.getReg()))
+      if (IsDead(Def, MO.getReg()))
+        collectLocalKilledOperands(Def, Dead);
+  }
+}
+
 bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI,
                                            int PhysReg) const {
   SmallPtrSet<MachineInstr*, 1> Ignore;

diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index f9b7b39b8fb3..3c63e9a544ed 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -897,65 +897,63 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
   if (!LoLoop.IsTailPredicationLegal())
     return;
 
-  if (auto *Def = RDA->getReachingMIDef(LoLoop.Start,
-                                        LoLoop.Start->getOperand(0).getReg())) {
-    SmallPtrSet<MachineInstr*, 4> Remove;
-    SmallPtrSet<MachineInstr*, 4> Ignore = { LoLoop.Start, LoLoop.Dec,
-                                             LoLoop.End, LoLoop.InsertPt };
-    SmallVector<MachineInstr*, 4> Chain = { Def };
-    while (!Chain.empty()) {
-      MachineInstr *MI = Chain.back();
-      Chain.pop_back();
-
-      // If an instruction is conditionally executed, we assume here that this
-      // an IT-block with just this single instruction in it, otherwise we
-      // continue and can't perform dead-code elimination on it. This will
-      // capture most cases, because the loop iteration count expression
-      // that performs a round-up to next multiple of the vector length will
-      // look like this:
-      //
-      //   %mull = ..
-      //   %0 = add i32 %mul, 3
-      //   %1 = icmp slt i32 %mul, 4
-      //   %smin = select i1 %1, i32 %mul, i32 4
-      //   %2 = sub i32 %0, %smin
-      //   %3 = lshr i32 %2, 2
-      //   %4 = add nuw nsw i32 %3, 1
-      //
-      // There can be a select instruction, checking if we need to execute only
-      // 1 vector iteration (in this examples that means 4 elements). Thus,
-      // we conditionally execute one instructions to materialise the iteration
-      // count.
-      MachineInstr *IT = nullptr;
-      if (TII->getPredicate(*MI) != ARMCC::AL) {
-        auto PrevMI = std::prev(MI->getIterator());
-        auto NextMI = std::next(MI->getIterator());
-
-        if (PrevMI->getOpcode() == ARM::t2IT &&
-            TII->getPredicate(*NextMI) == ARMCC::AL)
-          IT = &*PrevMI;
-        else
-          // We can't analyse IT-blocks with multiple statements. Be
-          // conservative here: clear the list, and don't remove any statements
-          // at all.
-          return;
-      }
+  auto *Def = RDA->getReachingMIDef(LoLoop.Start,
+                                    LoLoop.Start->getOperand(0).getReg());
+  if (!Def)
+    return;
 
-      if (RDA->isSafeToRemove(MI, Remove, Ignore)) {
-        for (auto &MO : MI->operands()) {
-          if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
-            continue;
-          if (auto *Op = RDA->getReachingMIDef(MI, MO.getReg()))
-            Chain.push_back(Op);
-        }
-        Ignore.insert(MI);
+  // Collect IT blocks.
+  std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
+  std::map<MachineInstr *, MachineInstr *> Predicates;
+  MachineInstr *IT = nullptr;
+  for (auto &MI : *Def->getParent()) {
+    if (MI.getOpcode() == ARM::t2IT)
+      IT = &MI;
+    else if (TII->getPredicate(MI) != ARMCC::AL) {
+      ITBlocks[IT].insert(&MI);
+      Predicates[&MI] = IT;
+    }
+  }
 
-        if (IT)
-          Remove.insert(IT);
-      }
+  // If we're removing all of the instructions within an IT block, then
+  // also remove the IT instruction.
+  SmallPtrSet<MachineInstr*, 2> ModifiedITs;
+  SmallPtrSet<MachineInstr*, 2> DeadITs;
+  SmallPtrSet<MachineInstr*, 4> Killed;
+  RDA->collectLocalKilledOperands(Def, Killed);
+  for (auto *MI : Killed) {
+    if (!Predicates.count(MI))
+      continue;
+
+    MachineInstr *IT = Predicates[MI];
+    auto &CurrentBlock = ITBlocks[IT];
+    CurrentBlock.erase(MI);
+    ModifiedITs.insert(IT);
+    if (CurrentBlock.empty()) {
+      DeadITs.insert(IT);
+      ModifiedITs.erase(IT);
     }
-    LoLoop.ToRemove.insert(Remove.begin(), Remove.end());
   }
+
+  // Delete the killed instructions only if we don't have any IT blocks that
+  // need to be modified because we need to fixup the mask.
+  // TODO: Handle cases where IT blocks are modified.
+  if (ModifiedITs.empty()) {
+    LLVM_DEBUG(dbgs() << "ARM Loops: Will remove iteration count:\n";
+               for (auto *MI : Killed)
+                 dbgs() << " - " << *MI;
+               for (auto *MI : DeadITs)
+                 dbgs() << " - " << *MI);
+    LoLoop.ToRemove.insert(Killed.begin(), Killed.end());
+    LoLoop.ToRemove.insert(DeadITs.begin(), DeadITs.end());
+  }
+
+  // Collect and remove the users of iteration count.
+  SmallPtrSet<MachineInstr*, 4> Ignore = { LoLoop.Start, LoLoop.Dec,
+                                           LoLoop.End, LoLoop.InsertPt };
+  SmallPtrSet<MachineInstr*, 2> Remove;
+  if (RDA->isSafeToRemove(Def, Remove, Ignore))
+    LoLoop.ToRemove.insert(Remove.begin(), Remove.end());
 }
 
 MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir
index 28e409a47d12..8c0d8dcffe3b 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-ignore-vctp.mir
@@ -95,12 +95,12 @@ body:             |
   ; CHECK-LABEL: name: dont_ignore_vctp
   ; CHECK: bb.0.entry:
   ; CHECK:   successors: %bb.1(0x80000000)
-  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r7
+  ; CHECK:   liveins: $lr, $r0, $r1, $r3, $r7
   ; CHECK:   frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
   ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
-  ; CHECK:   renamable $r3, dead $cpsr = tLSLri killed renamable $r2, 1, 14, $noreg
+  ; CHECK:   tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def dead $cpsr
   ; CHECK:   renamable $r2 = tLEApcrel %const.0, 14, $noreg
   ; CHECK:   renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
   ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r3

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir
index 5fcd4dee161c..41b8ef859e3a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain.mir
@@ -110,9 +110,8 @@ body:             |
   ; CHECK:   $r3 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit renamable $r12, implicit $itstate
   ; CHECK:   $r12 = t2LSLri renamable $r3, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate
   ; CHECK:   renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
-  ; CHECK:   renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
-  ; CHECK:   renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
-  ; CHECK:   dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
+  ; CHECK:   dead renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
+  ; CHECK:   dead renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
   ; CHECK:   renamable $r2 = tLEApcrel %const.0, 14, $noreg
   ; CHECK:   renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
   ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r3

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir
index 10f4b58d4218..b42788c198cd 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-itercount.mir
@@ -103,16 +103,9 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
-  ; CHECK:   renamable $r3, dead $cpsr = tLSLri renamable $r2, 1, 14, $noreg
-  ; CHECK:   renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
-  ; CHECK:   tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def $cpsr
-  ; CHECK:   t2IT 11, 8, implicit-def $itstate
-  ; CHECK:   $r12 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit killed renamable $r12, implicit $itstate
-  ; CHECK:   $r12 = t2LSLri renamable $r2, 1, 11, killed $cpsr, $noreg, implicit killed renamable $r12, implicit killed $itstate
-  ; CHECK:   renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
-  ; CHECK:   renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
-  ; CHECK:   renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
-  ; CHECK:   dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
+  ; CHECK:   renamable $r3, dead $cpsr = tLSLri killed renamable $r2, 1, 14, $noreg
+  ; CHECK:   dead renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
+  ; CHECK:   tCMPi8 renamable $r3, 4, 14, $noreg, implicit-def dead $cpsr
   ; CHECK:   renamable $r2 = tLEApcrel %const.0, 14, $noreg
   ; CHECK:   renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
   ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r3

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
new file mode 100644
index 000000000000..92a5113e6335
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
@@ -0,0 +1,218 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
+
+--- |
+  ; Function Attrs: nounwind
+  define hidden arm_aapcs_vfpcc void @cond_trip_count(float* %0, i32 %1, float* nocapture %2) local_unnamed_addr #1 {
+    ret void
+  }
+
+...
+---
+name:            cond_trip_count
+alignment:       4
+tracksRegLiveness: true
+registers:       []
+liveins:
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+frameInfo:
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+callSites:       []
+constants:
+  - id:              0
+    value:           'float 0.000000e+00'
+    alignment:       4
+    isTargetSpecific: false
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: cond_trip_count
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r4
+  ; CHECK:   frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
+  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $r4, -8
+  ; CHECK:   tCMPi8 renamable $r1, 4, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   renamable $r3 = t2MOVi 4, 14, $noreg, $noreg
+  ; CHECK:   t2IT 11, 8, implicit-def $itstate
+  ; CHECK:   dead $r3 = tMOVr renamable $r1, 11, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
+  ; CHECK:   tCMPi8 renamable $r1, 2, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
+  ; CHECK:   tBcc %bb.2, 2, killed $cpsr
+  ; CHECK: bb.1:
+  ; CHECK:   liveins: $r2
+  ; CHECK:   renamable $s0 = VLDRS %const.0, 0, 14, $noreg
+  ; CHECK:   VSTRS killed renamable $s0, killed renamable $r2, 0, 14, $noreg
+  ; CHECK:   tPOP_RET 14, $noreg, def $r4, def $pc
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   liveins: $r0, $r1, $r2, $r12
+  ; CHECK:   renamable $r4, dead $cpsr = tMOVi8 1, 14, $noreg
+  ; CHECK:   tCMPi8 renamable $r1, 4, 14, $noreg, implicit-def $cpsr
+  ; CHECK:   t2IT 11, 8, implicit-def $itstate
+  ; CHECK:   $r12 = tMOVr renamable $r1, 11, killed $cpsr, implicit killed renamable $r12, implicit killed $itstate
+  ; CHECK:   renamable $r3 = t2SUBrr renamable $r1, killed renamable $r12, 14, $noreg, $noreg
+  ; CHECK:   renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14, $noreg
+  ; CHECK:   $r12 = tMOVr $r1, 14, $noreg
+  ; CHECK:   dead renamable $r4 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14, $noreg, $noreg
+  ; CHECK:   renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
+  ; CHECK:   $r3 = tMOVr $r0, 14, $noreg
+  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r12
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.3(0x7c000000), %bb.4(0x04000000)
+  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $r3
+  ; CHECK:   renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r3, 0, 0, $noreg
+  ; CHECK:   renamable $q0 = nnan ninf nsz MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
+  ; CHECK:   renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14, $noreg
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.3
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.5(0x80000000)
+  ; CHECK:   liveins: $q0, $r0, $r1, $r2
+  ; CHECK:   renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+  ; CHECK:   $r3 = tMOVr $r1, 14, $noreg
+  ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r3
+  ; CHECK:   renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
+  ; CHECK:   renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit killed $q0
+  ; CHECK:   $s2 = VMOVSR $r1, 14, $noreg
+  ; CHECK:   renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
+  ; CHECK:   renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+  ; CHECK:   renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
+  ; CHECK: bb.5:
+  ; CHECK:   successors: %bb.5(0x7c000000), %bb.6(0x04000000)
+  ; CHECK:   liveins: $lr, $q0, $r0, $r1, $r2, $s4
+  ; CHECK:   $r4 = VMOVRS $s4, 14, $noreg
+  ; CHECK:   renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 0, $noreg
+  ; CHECK:   renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 0, $noreg, undef renamable $q2
+  ; CHECK:   renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg
+  ; CHECK:   renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 16, 14, $noreg
+  ; CHECK:   $lr = MVE_LETP killed renamable $lr, %bb.5
+  ; CHECK: bb.6:
+  ; CHECK:   liveins: $q0, $r1, $r2
+  ; CHECK:   renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+  ; CHECK:   renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14, $noreg
+  ; CHECK:   renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
+  ; CHECK:   renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit killed $q0
+  ; CHECK:   $s2 = VMOVSR killed $r0, 14, $noreg
+  ; CHECK:   renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
+  ; CHECK:   renamable $s0 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+  ; CHECK:   VSTRS killed renamable $s0, killed renamable $r2, 0, 14, $noreg
+  ; CHECK:   tPOP_RET 14, $noreg, def $r4, def $pc
+  ; CHECK: bb.7 (align 4):
+  ; CHECK:   CONSTPOOL_ENTRY 0, %const.0, 4
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+    liveins: $r0, $r1, $r2, $r4, $lr
+
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r4, -8
+    tCMPi8 renamable $r1, 4, 14, $noreg, implicit-def $cpsr
+    renamable $r3 = t2MOVi 4, 14, $noreg, $noreg
+    t2IT 11, 8, implicit-def $itstate
+    $r3 = tMOVr renamable $r1, 11, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
+    tCMPi8 renamable $r1, 2, 14, $noreg, implicit-def $cpsr
+    renamable $r12 = t2MOVi 4, 14, $noreg, $noreg
+    tBcc %bb.2, 2, killed $cpsr
+
+  bb.1:
+    liveins: $r2
+
+    renamable $s0 = VLDRS %const.0, 0, 14, $noreg
+    VSTRS killed renamable $s0, killed renamable $r2, 0, 14, $noreg
+    tPOP_RET 14, $noreg, def $r4, def $pc
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+    liveins: $r0, $r1, $r2, $r3, $r12
+
+    renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14, $noreg
+    renamable $r4, dead $cpsr = tMOVi8 1, 14, $noreg
+    renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14, $noreg
+    tCMPi8 renamable $r1, 4, 14, $noreg, implicit-def $cpsr
+    renamable $lr = nuw nsw t2ADDrs renamable $r4, killed renamable $r3, 19, 14, $noreg, $noreg
+    t2IT 11, 8, implicit-def $itstate
+    $r12 = tMOVr renamable $r1, 11, killed $cpsr, implicit killed renamable $r12, implicit killed $itstate
+    renamable $r3 = t2SUBrr renamable $r1, killed renamable $r12, 14, $noreg, $noreg
+    renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14, $noreg
+    $r12 = tMOVr $r1, 14, $noreg
+    renamable $r4 = nuw nsw t2ADDrs killed renamable $r4, killed renamable $r3, 19, 14, $noreg, $noreg
+    renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
+    $r3 = tMOVr $r0, 14, $noreg
+    t2DoLoopStart renamable $lr
+
+  bb.3:
+    successors: %bb.3(0x7c000000), %bb.4(0x04000000)
+    liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12
+
+    renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    MVE_VPST 4, implicit $vpr
+    renamable $q1 = nnan ninf nsz MVE_VLDRWU32 renamable $r3, 0, 1, renamable $vpr
+    renamable $q0 = nnan ninf nsz MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, renamable $q0
+    renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
+    renamable $r3, dead $cpsr = nuw tADDi8 killed renamable $r3, 16, 14, $noreg
+    t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr
+    tB %bb.4, 14, $noreg
+
+  bb.4:
+    successors: %bb.5(0x80000000)
+    liveins: $q0, $r0, $r1, $r2, $r4
+
+    renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+    $lr = tMOVr $r4, 14, $noreg
+    $r3 = tMOVr $r1, 14, $noreg
+    renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
+    renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
+    $s2 = VMOVSR $r1, 14, $noreg
+    renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
+    t2DoLoopStart killed $r4
+    renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+    renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0
+
+  bb.5:
+    successors: %bb.5(0x7c000000), %bb.6(0x04000000)
+    liveins: $lr, $q0, $r0, $r1, $r2, $r3, $s4
+
+    renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
+    $r4 = VMOVRS $s4, 14, $noreg
+    MVE_VPST 2, implicit $vpr
+    renamable $q2 = nnan ninf nsz MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr
+    renamable $q2 = nnan ninf nsz MVE_VSUB_qr_f32 killed renamable $q2, killed renamable $r4, 1, renamable $vpr, undef renamable $q2
+    renamable $q0 = nnan ninf nsz MVE_VFMAf32 killed renamable $q0, killed renamable $q2, renamable $q2, 1, killed renamable $vpr
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    renamable $r3, dead $cpsr = nsw tSUBi8 killed renamable $r3, 4, 14, $noreg
+    renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 16, 14, $noreg
+    t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr
+    tB %bb.6, 14, $noreg
+
+  bb.6:
+    liveins: $q0, $r1, $r2
+
+    renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+    renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14, $noreg
+    renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
+    renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
+    $s2 = VMOVSR killed $r0, 14, $noreg
+    renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
+    renamable $s0 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+    VSTRS killed renamable $s0, killed renamable $r2, 0, 14, $noreg
+    tPOP_RET 14, $noreg, def $r4, def $pc
+
+  bb.7 (align 4):
+    CONSTPOOL_ENTRY 0, %const.0, 4
+
+...

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir
index 128b0fe87b9b..d7f49456d810 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-random.mir
@@ -111,9 +111,8 @@ body:             |
   ; CHECK:   $r12 = t2LSLri renamable $r2, 1, 11, $cpsr, $noreg, implicit killed renamable $r12, implicit $itstate
   ; CHECK:   $r0 = t2ADDri killed renamable $r0, 42, 11, killed $cpsr, $noreg, implicit killed renamable $r0, implicit killed $itstate
   ; CHECK:   renamable $r2 = t2RSBrs killed renamable $r12, killed renamable $r2, 10, 14, $noreg, $noreg
-  ; CHECK:   renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
-  ; CHECK:   renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
-  ; CHECK:   dead renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $r12, 19, 14, $noreg, $noreg
+  ; CHECK:   dead renamable $r12 = t2ADDri killed renamable $r2, 3, 14, $noreg, $noreg
+  ; CHECK:   dead renamable $r2, dead $cpsr = tMOVi8 1, 14, $noreg
   ; CHECK:   renamable $r2 = tLEApcrel %const.0, 14, $noreg
   ; CHECK:   renamable $q0 = MVE_VLDRWU32 killed renamable $r2, 0, 0, $noreg :: (load 16 from constant-pool)
   ; CHECK:   $lr = MVE_DLSTP_32 killed renamable $r3


        


More information about the llvm-commits mailing list