[llvm-branch-commits] [llvm] d9bf624 - [ARM] Revert low overhead loops with calls before registry allocation.

David Green via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Dec 7 07:44:57 PST 2020


Author: David Green
Date: 2020-12-07T15:44:40Z
New Revision: d9bf6245bfef41ad7606f0e64e0c4f12d65a2b46

URL: https://github.com/llvm/llvm-project/commit/d9bf6245bfef41ad7606f0e64e0c4f12d65a2b46
DIFF: https://github.com/llvm/llvm-project/commit/d9bf6245bfef41ad7606f0e64e0c4f12d65a2b46.diff

LOG: [ARM] Revert low overhead loops with calls before registry allocation.

This adds code to revert low overhead loops with calls in them before
register allocation. Ideally we would not create low overhead loops with
calls in them to begin with, but that can be difficult to always get
correct. If we want to try and glue together t2LoopDec and t2LoopEnd
into a single instruction, we need to ensure that no instructions use LR
in the loop. (Technically the final code can be better too, as it
doesn't need to use the same registers but that has not been optimized
for here, as reverting loops with calls is expected to be very rare).

It also adds a MVETailPredUtils.h header to share the revert code
between different passes, and provides a place to expand upon, with
RevertLoopWithCall becoming a place to perform other low overhead loop
alterations like removing copies or combining LoopDec and End into a
single instruction.

Differential Revision: https://reviews.llvm.org/D91273

Added: 
    llvm/lib/Target/ARM/MVETailPredUtils.h
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir

Modified: 
    llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/lib/Target/ARM/ARMBaseInstrInfo.h
    llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6426d7d85dcd..f095397ec3f9 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"

diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 461a83693c79..234e8db88d26 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -614,56 +614,6 @@ unsigned VCMPOpcodeToVPT(unsigned Opcode) {
   }
 }
 
-static inline
-unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("unhandled vctp opcode");
-    break;
-  case ARM::MVE_VCTP8:
-    return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
-  case ARM::MVE_VCTP16:
-    return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
-  case ARM::MVE_VCTP32:
-    return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
-  case ARM::MVE_VCTP64:
-    return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
-  }
-  return 0;
-}
-
-static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
-  switch (Opcode) {
-  default:
-    llvm_unreachable("unhandled vctp opcode");
-  case ARM::MVE_VCTP8:  return 16;
-  case ARM::MVE_VCTP16: return 8;
-  case ARM::MVE_VCTP32: return 4;
-  case ARM::MVE_VCTP64: return 2;
-  }
-  return 0;
-}
-
-static inline bool isVCTP(const MachineInstr *MI) {
-  switch (MI->getOpcode()) {
-  default:
-    break;
-  case ARM::MVE_VCTP8:
-  case ARM::MVE_VCTP16:
-  case ARM::MVE_VCTP32:
-  case ARM::MVE_VCTP64:
-    return true;
-  }
-  return false;
-}
-
-static inline
-bool isLoopStart(MachineInstr &MI) {
-  return MI.getOpcode() == ARM::t2DoLoopStart ||
-         MI.getOpcode() == ARM::t2DoLoopStartTP ||
-         MI.getOpcode() == ARM::t2WhileLoopStart;
-}
-
 static inline
 bool isCondBranchOpcode(int Opc) {
   return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;

diff  --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index 0f0418901bec..6901272496a0 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -56,6 +56,7 @@
 #include "ARMBaseRegisterInfo.h"
 #include "ARMBasicBlockInfo.h"
 #include "ARMSubtarget.h"
+#include "MVETailPredUtils.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallSet.h"
@@ -1310,33 +1311,16 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
 // another low register.
 void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
-  MachineBasicBlock *MBB = MI->getParent();
-  MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                                    TII->get(ARM::t2CMPri));
-  MIB.add(MI->getOperand(0));
-  MIB.addImm(0);
-  MIB.addImm(ARMCC::AL);
-  MIB.addReg(ARM::NoRegister);
-
   MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
   unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
     ARM::tBcc : ARM::t2Bcc;
 
-  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
-  MIB.add(MI->getOperand(1));   // branch target
-  MIB.addImm(ARMCC::EQ);        // condition code
-  MIB.addReg(ARM::CPSR);
-  MI->eraseFromParent();
+  RevertWhileLoopStart(MI, TII, BrOpc);
 }
 
 void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
-  MachineBasicBlock *MBB = MI->getParent();
-  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
-      .add(MI->getOperand(0))
-      .add(MI->getOperand(1))
-      .add(predOps(ARMCC::AL));
-  MI->eraseFromParent();
+  RevertDoLoopStart(MI, TII);
 }
 
 bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
@@ -1354,21 +1338,7 @@ bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
   bool SetFlags =
       RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore);
 
-  MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                                    TII->get(ARM::t2SUBri));
-  MIB.addDef(ARM::LR);
-  MIB.add(MI->getOperand(1));
-  MIB.add(MI->getOperand(2));
-  MIB.addImm(ARMCC::AL);
-  MIB.addReg(0);
-
-  if (SetFlags) {
-    MIB.addReg(ARM::CPSR);
-    MIB->getOperand(5).setIsDef(true);
-  } else
-    MIB.addReg(0);
-
-  MI->eraseFromParent();
+  llvm::RevertLoopDec(MI, TII, SetFlags);
   return SetFlags;
 }
 
@@ -1376,28 +1346,11 @@ bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
 
-  MachineBasicBlock *MBB = MI->getParent();
-  // Create cmp
-  if (!SkipCmp) {
-    MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                                      TII->get(ARM::t2CMPri));
-    MIB.addReg(ARM::LR);
-    MIB.addImm(0);
-    MIB.addImm(ARMCC::AL);
-    MIB.addReg(ARM::NoRegister);
-  }
-
   MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
   unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
     ARM::tBcc : ARM::t2Bcc;
 
-  // Create bne
-  MachineInstrBuilder MIB =
-    BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
-  MIB.add(MI->getOperand(1));   // branch target
-  MIB.addImm(ARMCC::NE);        // condition code
-  MIB.addReg(ARM::CPSR);
-  MI->eraseFromParent();
+  llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
 }
 
 // Perform dead code elimation on the loop iteration count setup expression.

diff  --git a/llvm/lib/Target/ARM/MVETailPredUtils.h b/llvm/lib/Target/ARM/MVETailPredUtils.h
new file mode 100644
index 000000000000..9ab5d92729fe
--- /dev/null
+++ b/llvm/lib/Target/ARM/MVETailPredUtils.h
@@ -0,0 +1,157 @@
+//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains utility functions for low overhead and tail predicated
+// loops, shared between the ARMLowOverheadLoops pass and anywhere else that
+// needs them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+namespace llvm {
+
+static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
+  switch (Opcode) {
+  default:
+    llvm_unreachable("unhandled vctp opcode");
+    break;
+  case ARM::MVE_VCTP8:
+    return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
+  case ARM::MVE_VCTP16:
+    return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
+  case ARM::MVE_VCTP32:
+    return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
+  case ARM::MVE_VCTP64:
+    return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
+  }
+  return 0;
+}
+
+static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    llvm_unreachable("unhandled vctp opcode");
+  case ARM::MVE_VCTP8:
+    return 16;
+  case ARM::MVE_VCTP16:
+    return 8;
+  case ARM::MVE_VCTP32:
+    return 4;
+  case ARM::MVE_VCTP64:
+    return 2;
+  }
+  return 0;
+}
+
+static inline bool isVCTP(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    break;
+  case ARM::MVE_VCTP8:
+  case ARM::MVE_VCTP16:
+  case ARM::MVE_VCTP32:
+  case ARM::MVE_VCTP64:
+    return true;
+  }
+  return false;
+}
+
+static inline bool isLoopStart(MachineInstr &MI) {
+  return MI.getOpcode() == ARM::t2DoLoopStart ||
+         MI.getOpcode() == ARM::t2DoLoopStartTP ||
+         MI.getOpcode() == ARM::t2WhileLoopStart;
+}
+
+// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
+// beq that branches to the exit branch.
+inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII,
+                        unsigned BrOpc = ARM::t2Bcc) {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // Cmp
+  MachineInstrBuilder MIB =
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
+  MIB.add(MI->getOperand(0));
+  MIB.addImm(0);
+  MIB.addImm(ARMCC::AL);
+  MIB.addReg(ARM::NoRegister);
+
+  // Branch
+  MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+  MIB.add(MI->getOperand(1)); // branch target
+  MIB.addImm(ARMCC::EQ);      // condition code
+  MIB.addReg(ARM::CPSR);
+
+  MI->eraseFromParent();
+}
+
+inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) {
+  MachineBasicBlock *MBB = MI->getParent();
+  BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
+      .add(MI->getOperand(0))
+      .add(MI->getOperand(1))
+      .add(predOps(ARMCC::AL));
+
+  MI->eraseFromParent();
+}
+
+inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII,
+                          bool SetFlags = false) {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  MachineInstrBuilder MIB =
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
+  MIB.add(MI->getOperand(0));
+  MIB.add(MI->getOperand(1));
+  MIB.add(MI->getOperand(2));
+  MIB.addImm(ARMCC::AL);
+  MIB.addReg(0);
+
+  if (SetFlags) {
+    MIB.addReg(ARM::CPSR);
+    MIB->getOperand(5).setIsDef(true);
+  } else
+    MIB.addReg(0);
+
+  MI->eraseFromParent();
+}
+
+// Generate a subs, or sub and cmp, and a branch instead of an LE.
+inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII,
+                          unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // Create cmp
+  if (!SkipCmp) {
+    MachineInstrBuilder MIB =
+        BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
+    MIB.add(MI->getOperand(0));
+    MIB.addImm(0);
+    MIB.addImm(ARMCC::AL);
+    MIB.addReg(ARM::NoRegister);
+  }
+
+  // Create bne
+  MachineInstrBuilder MIB =
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
+  MIB.add(MI->getOperand(1)); // branch target
+  MIB.addImm(ARMCC::NE);      // condition code
+  MIB.addReg(ARM::CPSR);
+  MI->eraseFromParent();
+}
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H

diff  --git a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
index 9cac8a8e1162..ee3821d34025 100644
--- a/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVEVPTOptimisationsPass.cpp
@@ -18,6 +18,7 @@
 #include "ARM.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -58,6 +59,7 @@ class MVEVPTOptimisations : public MachineFunctionPass {
   }
 
 private:
+  bool RevertLoopWithCall(MachineLoop *ML);
   bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
                                             MachineInstr &Instr,
@@ -156,6 +158,31 @@ static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
   return true;
 }
 
+bool MVEVPTOptimisations::RevertLoopWithCall(MachineLoop *ML) {
+  LLVM_DEBUG(dbgs() << "RevertLoopWithCall on loop " << ML->getHeader()->getName()
+                    << "\n");
+
+  MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
+  if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
+    return false;
+
+  // Check if there is an illegal instruction (a call) in the low overhead loop
+  // and if so revert it now before we get any further.
+  for (MachineBasicBlock *MBB : ML->blocks()) {
+    for (MachineInstr &MI : *MBB) {
+      if (MI.isCall()) {
+        LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);
+        RevertDoLoopStart(LoopStart, TII);
+        RevertLoopDec(LoopDec, TII);
+        RevertLoopEnd(LoopEnd, TII);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
 // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
 // instruction, making the backend ARMLowOverheadLoops passes job of finding the
@@ -662,7 +689,7 @@ bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
   const ARMSubtarget &STI =
       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
 
-  if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
+  if (!STI.isThumb2() || !STI.hasLOB())
     return false;
 
   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
@@ -674,8 +701,10 @@ bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
                     << "********** Function: " << Fn.getName() << '\n');
 
   bool Modified = false;
-  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder())
+  for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
+    Modified |= RevertLoopWithCall(ML);
     Modified |= ConvertTailPredLoop(ML, DT);
+  }
 
   for (MachineBasicBlock &MBB : Fn) {
     Modified |= ReplaceVCMPsByVPNOTs(MBB);

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
index 3c37c4a14b71..21cb00cc8bb2 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-default.mir
@@ -272,7 +272,7 @@ body:             |
   ; CHECK:   renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg
   ; CHECK:   early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg
   ; CHECK:   early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39)
-  ; CHECK:   dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
+  ; CHECK:   dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK:   renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r0 = tMOVr $r7, 14 /* CC::al */, $noreg
   ; CHECK:   $r4 = tMOVr $r5, 14 /* CC::al */, $noreg

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
index 607cd788930b..14127639be29 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/biquad-cascade-optsize-strd-lr.mir
@@ -258,7 +258,7 @@ body:             |
   ; CHECK:   renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2)
   ; CHECK:   renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38)
-  ; CHECK:   dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
+  ; CHECK:   dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
   ; CHECK:   renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r4 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3)
   ; CHECK:   $r8 = tMOVr $r5, 14 /* CC::al */, $noreg

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
index fc914f7920f1..60ee6d61d3ff 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-dec-copy-chain.mir
@@ -296,7 +296,7 @@ body:             |
   ; CHECK:   renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14 /* CC::al */, $noreg
   ; CHECK:   tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep1)
-  ; CHECK:   t2CMPri killed $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2CMPri killed renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   tBcc %bb.4, 1 /* CC::ne */, killed $cpsr
   ; CHECK:   tB %bb.5, 14 /* CC::al */, $noreg
   ; CHECK: bb.5.bb13:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
index a7c1876d9c5d..8f195cefa50e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
@@ -8,7 +8,7 @@
 # CHECK:   tBcc %bb.4, 0
 # CHECK:   tB %bb.2
 # CHECK: bb.3.while.body:
-# CHECK:   t2CMPri $lr, 0, 14
+# CHECK:   t2CMPri renamable $lr, 0, 14
 # CHECK:   tBcc %bb.3, 1
 # CHECK:   tB %bb.4
 # CHECK: bb.4.while.end:

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir
new file mode 100644
index 000000000000..2b0ca2645486
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revertcallearly.mir
@@ -0,0 +1,145 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+lob -run-pass=arm-mve-vpt-opts --verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+  @d = local_unnamed_addr global i32 0, align 4
+  @c = local_unnamed_addr global [1 x i32] zeroinitializer, align 4
+
+  define i32 @e() optsize {
+  entry:
+    %.pr = load i32, i32* @d, align 4
+    %cmp13 = icmp sgt i32 %.pr, -1
+    br i1 %cmp13, label %for.cond1.preheader.preheader, label %for.end9
+
+  for.cond1.preheader.preheader:                    ; preds = %entry
+    %0 = add i32 %.pr, 1
+    %1 = call i32 @llvm.start.loop.iterations.i32(i32 %0)
+    br label %for.cond1.preheader
+
+  for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
+    %2 = phi i32 [ %1, %for.cond1.preheader.preheader ], [ %3, %for.cond1.preheader ]
+    call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(24) bitcast ([1 x i32]* @c to i8*), i8 0, i32 24, i1 false)
+    %3 = call i32 @llvm.loop.decrement.reg.i32(i32 %2, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.cond1.preheader, label %for.cond.for.end9_crit_edge
+
+  for.cond.for.end9_crit_edge:                      ; preds = %for.cond1.preheader
+    store i32 -1, i32* @d, align 4
+    br label %for.end9
+
+  for.end9:                                         ; preds = %for.cond.for.end9_crit_edge, %entry
+    ret i32 undef
+  }
+
+  declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
+  declare i32 @llvm.start.loop.iterations.i32(i32)
+  declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
+
+...
+---
+name:            e
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gprnopc, preferred-register: '' }
+  - { id: 1, class: gpr, preferred-register: '' }
+  - { id: 2, class: gprlr, preferred-register: '' }
+  - { id: 3, class: gpr, preferred-register: '' }
+  - { id: 4, class: rgpr, preferred-register: '' }
+  - { id: 5, class: rgpr, preferred-register: '' }
+  - { id: 6, class: gprlr, preferred-register: '' }
+  - { id: 7, class: rgpr, preferred-register: '' }
+  - { id: 8, class: rgpr, preferred-register: '' }
+  - { id: 9, class: gprlr, preferred-register: '' }
+  - { id: 10, class: gprlr, preferred-register: '' }
+  - { id: 11, class: rgpr, preferred-register: '' }
+  - { id: 12, class: rgpr, preferred-register: '' }
+  - { id: 13, class: gpr, preferred-register: '' }
+liveins:         []
+body:             |
+  ; CHECK-LABEL: name: e
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x50000000), %bb.4(0x30000000)
+  ; CHECK:   [[t2MOVi32imm:%[0-9]+]]:rgpr = t2MOVi32imm @d
+  ; CHECK:   [[t2LDRi12_:%[0-9]+]]:gprnopc = t2LDRi12 [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d)
+  ; CHECK:   t2CMPri [[t2LDRi12_]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2Bcc %bb.4, 4 /* CC::mi */, $cpsr
+  ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
+  ; CHECK: bb.1.for.cond1.preheader.preheader:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[t2LDRi12_]], 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   [[tMOVr:%[0-9]+]]:gprlr = tMOVr killed [[t2ADDri]], 14 /* CC::al */, $noreg
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr = COPY [[tMOVr]]
+  ; CHECK:   [[t2MOVi32imm1:%[0-9]+]]:rgpr = t2MOVi32imm @c
+  ; CHECK:   [[t2MOVi:%[0-9]+]]:rgpr = t2MOVi 24, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK: bb.2.for.cond1.preheader:
+  ; CHECK:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  ; CHECK:   [[PHI:%[0-9]+]]:gprlr = PHI [[COPY]], %bb.1, %3, %bb.2
+  ; CHECK:   ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+  ; CHECK:   $r0 = COPY [[t2MOVi32imm1]]
+  ; CHECK:   $r1 = COPY [[t2MOVi]]
+  ; CHECK:   tBL 14 /* CC::al */, $noreg, &__aeabi_memclr4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp
+  ; CHECK:   ADJCALLSTACKUP 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+  ; CHECK:   [[t2SUBri:%[0-9]+]]:gprlr = t2SUBri [[PHI]], 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   [[COPY1:%[0-9]+]]:gpr = COPY [[t2SUBri]]
+  ; CHECK:   t2CMPri [[t2SUBri]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2Bcc %bb.2, 1 /* CC::ne */, $cpsr
+  ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
+  ; CHECK: bb.3.for.cond.for.end9_crit_edge:
+  ; CHECK:   successors: %bb.4(0x80000000)
+  ; CHECK:   [[t2MOVi1:%[0-9]+]]:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   t2STRi12 killed [[t2MOVi1]], [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (store 4 into @d)
+  ; CHECK: bb.4.for.end9:
+  ; CHECK:   [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF
+  ; CHECK:   $r0 = COPY [[DEF]]
+  ; CHECK:   tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.4(0x30000000)
+
+    %4:rgpr = t2MOVi32imm @d
+    %0:gprnopc = t2LDRi12 %4, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d)
+    t2CMPri %0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2Bcc %bb.4, 4 /* CC::mi */, $cpsr
+    t2B %bb.1, 14 /* CC::al */, $noreg
+
+  bb.1.for.cond1.preheader.preheader:
+    successors: %bb.2(0x80000000)
+
+    %5:rgpr = t2ADDri %0, 1, 14 /* CC::al */, $noreg, $noreg
+    %6:gprlr = t2DoLoopStart killed %5
+    %1:gpr = COPY %6
+    %7:rgpr = t2MOVi32imm @c
+    %8:rgpr = t2MOVi 24, 14 /* CC::al */, $noreg, $noreg
+
+  bb.2.for.cond1.preheader:
+    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+
+    %2:gprlr = PHI %1, %bb.1, %3, %bb.2
+    ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+    $r0 = COPY %7
+    $r1 = COPY %8
+    tBL 14 /* CC::al */, $noreg, &__aeabi_memclr4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp
+    ADJCALLSTACKUP 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+    %9:gprlr = t2LoopDec %2, 1
+    %3:gpr = COPY %9
+    t2LoopEnd %9, %bb.2, implicit-def dead $cpsr
+    t2B %bb.3, 14 /* CC::al */, $noreg
+
+  bb.3.for.cond.for.end9_crit_edge:
+    successors: %bb.4(0x80000000)
+
+    %12:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg
+    t2STRi12 killed %12, %4, 0, 14 /* CC::al */, $noreg :: (store 4 into @d)
+
+  bb.4.for.end9:
+    %13:gpr = IMPLICIT_DEF
+    $r0 = COPY %13
+    tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+...

diff  --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir
index f3589590d12b..4ba73cd288f3 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir
@@ -113,7 +113,7 @@ body:             |
   ; CHECK:   renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2 /* CC::hs */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
   ; CHECK:   early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep4)
   ; CHECK:   renamable $lr = tMOVr killed $lr, 14 /* CC::al */, $noreg
-  ; CHECK:   t2CMPri $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2CMPri renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   tBcc %bb.1, 1 /* CC::ne */, killed $cpsr
   ; CHECK:   tB %bb.2, 14 /* CC::al */, $noreg
   ; CHECK: bb.2.while.end:


        


More information about the llvm-branch-commits mailing list