[llvm] r372114 - [ARM] Fix for MVE load/store stack accesses

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 17 05:58:51 PDT 2019


Author: dmgreen
Date: Tue Sep 17 05:58:51 2019
New Revision: 372114

URL: http://llvm.org/viewvc/llvm-project?rev=372114&view=rev
Log:
[ARM] Fix for MVE load/store stack accesses

MVE loads and stores have a 7 bit immediate range, scaled by the length of the type. This needs to be taught to the stack estimation code to ensure that an emergency spill slot is reserved in case we run out of registers when materialising stack indices.

Also the narrowing loads/stores can be created with frame indices even though they do not accept SP as a register. We need in those cases to make sure we have an emergency register to use as the frame base, as SP can never be used.

Differential Revision: https://reviews.llvm.org/D67327

Added:
    llvm/trunk/test/CodeGen/Thumb2/mve-stacksplot.mir
Modified:
    llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp?rev=372114&r1=372113&r2=372114&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMFrameLowering.cpp Tue Sep 17 05:58:51 2019
@@ -1503,8 +1503,12 @@ static unsigned EstimateFunctionSizeInBy
 /// instructions will require a scratch register during their expansion later.
 // FIXME: Move to TII?
 static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
-                                         const TargetFrameLowering *TFI) {
+                                         const TargetFrameLowering *TFI,
+                                         bool &HasNonSPFrameIndex) {
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const ARMBaseInstrInfo &TII =
+      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   unsigned Limit = (1 << 12) - 1;
   for (auto &MBB : MF) {
     for (auto &MI : MBB) {
@@ -1519,6 +1523,11 @@ static unsigned estimateRSStackSizeLimit
           break;
         }
 
+        const MCInstrDesc &MCID = MI.getDesc();
+        const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
+        if (RegClass && !RegClass->contains(ARM::SP))
+          HasNonSPFrameIndex = true;
+
         // Otherwise check the addressing mode.
         switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
         case ARMII::AddrMode3:
@@ -1541,6 +1550,15 @@ static unsigned estimateRSStackSizeLimit
           // Addressing modes 4 & 6 (load/store) instructions can't encode an
           // immediate offset for stack references.
           return 0;
+        case ARMII::AddrModeT2_i7:
+          Limit = std::min(Limit, ((1U << 7) - 1) * 1);
+          break;
+        case ARMII::AddrModeT2_i7s2:
+          Limit = std::min(Limit, ((1U << 7) - 1) * 2);
+          break;
+        case ARMII::AddrModeT2_i7s4:
+          Limit = std::min(Limit, ((1U << 7) - 1) * 4);
+          break;
         default:
           break;
         }
@@ -1784,6 +1802,7 @@ void ARMFrameLowering::determineCalleeSa
   EstimatedStackSize += 16; // For possible paddings.
 
   unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
+  bool HasNonSPFrameIndex = false;
   if (AFI->isThumb1OnlyFunction()) {
     // For Thumb1, don't bother to iterate over the function. The only
     // instruction that requires an emergency spill slot is a store to a
@@ -1804,7 +1823,8 @@ void ARMFrameLowering::determineCalleeSa
       EstimatedRSStackSizeLimit = (1U << 8) * 4;
     EstimatedRSFixedSizeLimit = (1U << 5) * 4;
   } else {
-    EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this);
+    EstimatedRSStackSizeLimit =
+        estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
     EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
   }
   // Final estimate of whether sp or bp-relative accesses might require
@@ -1830,12 +1850,11 @@ void ARMFrameLowering::determineCalleeSa
       HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
 
   bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
-                         HasLargeArgumentList;
+                         HasLargeArgumentList || HasNonSPFrameIndex;
   LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
-                    << "; EstimatedStack" << EstimatedStackSize
-                    << "; EstimatedFPStack" << MaxFixedOffset - MaxFPOffset
-                    << "; BigFrameOffsets: " << BigFrameOffsets
-                    << "\n");
+                    << "; EstimatedStack: " << EstimatedStackSize
+                    << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
+                    << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
   if (BigFrameOffsets ||
       !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
     AFI->setHasStackFrame(true);

Added: llvm/trunk/test/CodeGen/Thumb2/mve-stacksplot.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/mve-stacksplot.mir?rev=372114&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/mve-stacksplot.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/mve-stacksplot.mir Tue Sep 17 05:58:51 2019
@@ -0,0 +1,185 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - %s -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -run-pass=stack-protector -run-pass=prologepilog | FileCheck %s
+---
+name: func0
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -16, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: func0
+    ; CHECK: liveins: $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr
+    ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr
+    ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 36
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r11, -8
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -12
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -16
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -20
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -24
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -28
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -32
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36
+    ; CHECK: $sp = frame-setup tSUBspi $sp, 5, 14, $noreg
+    ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 56
+    ; CHECK: $r0 = IMPLICIT_DEF
+    ; CHECK: $r1 = IMPLICIT_DEF
+    ; CHECK: $r2 = IMPLICIT_DEF
+    ; CHECK: $r3 = IMPLICIT_DEF
+    ; CHECK: $r4 = IMPLICIT_DEF
+    ; CHECK: $r5 = IMPLICIT_DEF
+    ; CHECK: $r6 = IMPLICIT_DEF
+    ; CHECK: $r7 = IMPLICIT_DEF
+    ; CHECK: $r8 = IMPLICIT_DEF
+    ; CHECK: $r9 = IMPLICIT_DEF
+    ; CHECK: $r10 = IMPLICIT_DEF
+    ; CHECK: $r11 = IMPLICIT_DEF
+    ; CHECK: $r12 = IMPLICIT_DEF
+    ; CHECK: $lr = IMPLICIT_DEF
+    ; CHECK: t2STRi12 killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.1)
+    ; CHECK: $r0 = tMOVr killed $sp, 14, $noreg
+    ; CHECK: renamable $q2 = MVE_VLDRBU32 killed $r0, 16, 0, $noreg :: (load 4 from %stack.0 + 12)
+    ; CHECK: $r0 = t2LDRi12 $sp, 0, 14, $noreg :: (load 4 from %stack.1)
+    ; CHECK: KILL $r0
+    ; CHECK: KILL $r1
+    ; CHECK: KILL $r2
+    ; CHECK: KILL $r3
+    ; CHECK: KILL $r4
+    ; CHECK: KILL $r5
+    ; CHECK: KILL $r6
+    ; CHECK: KILL $r7
+    ; CHECK: KILL $r8
+    ; CHECK: KILL $r9
+    ; CHECK: KILL $r10
+    ; CHECK: KILL $r11
+    ; CHECK: KILL $r12
+    ; CHECK: KILL $lr
+    $r0 = IMPLICIT_DEF
+    $r1 = IMPLICIT_DEF
+    $r2 = IMPLICIT_DEF
+    $r3 = IMPLICIT_DEF
+    $r4 = IMPLICIT_DEF
+    $r5 = IMPLICIT_DEF
+    $r6 = IMPLICIT_DEF
+    $r7 = IMPLICIT_DEF
+    $r8 = IMPLICIT_DEF
+    $r9 = IMPLICIT_DEF
+    $r10 = IMPLICIT_DEF
+    $r11 = IMPLICIT_DEF
+    $r12 = IMPLICIT_DEF
+    $lr = IMPLICIT_DEF
+
+    renamable $q2 = MVE_VLDRBU32 %stack.0, 12, 0, $noreg :: (load 4 from %stack.0 + 12)
+
+    KILL $r0
+    KILL $r1
+    KILL $r2
+    KILL $r3
+    KILL $r4
+    KILL $r5
+    KILL $r6
+    KILL $r7
+    KILL $r8
+    KILL $r9
+    KILL $r10
+    KILL $r11
+    KILL $r12
+    KILL $lr
+...
+---
+name: func1
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -1200, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 1, name: '', type: default, offset: 0, size: 1200, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -1200, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: func1
+    ; CHECK: liveins: $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr
+    ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr
+    ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 36
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r11, -8
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -12
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -16
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -20
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -24
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -28
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -32
+    ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -36
+    ; CHECK: $sp = frame-setup t2SUBri killed $sp, 1216, 14, $noreg, $noreg
+    ; CHECK: $sp = frame-setup tSUBspi $sp, 1, 14, $noreg
+    ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 1256
+    ; CHECK: $r0 = IMPLICIT_DEF
+    ; CHECK: $r1 = IMPLICIT_DEF
+    ; CHECK: $r2 = IMPLICIT_DEF
+    ; CHECK: $r3 = IMPLICIT_DEF
+    ; CHECK: $r4 = IMPLICIT_DEF
+    ; CHECK: $r5 = IMPLICIT_DEF
+    ; CHECK: $r6 = IMPLICIT_DEF
+    ; CHECK: $r7 = IMPLICIT_DEF
+    ; CHECK: $r8 = IMPLICIT_DEF
+    ; CHECK: $r9 = IMPLICIT_DEF
+    ; CHECK: $r10 = IMPLICIT_DEF
+    ; CHECK: $r11 = IMPLICIT_DEF
+    ; CHECK: $r12 = IMPLICIT_DEF
+    ; CHECK: $lr = IMPLICIT_DEF
+    ; CHECK: t2STRi12 killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.2)
+    ; CHECK: $r0 = t2ADDri killed $sp, 1152, 14, $noreg, $noreg
+    ; CHECK: renamable $q2 = MVE_VLDRBU8 killed $r0, 52, 0, $noreg :: (load 4 from %stack.0)
+    ; CHECK: $r0 = t2LDRi12 $sp, 0, 14, $noreg :: (load 4 from %stack.2)
+    ; CHECK: KILL $r0
+    ; CHECK: KILL $r1
+    ; CHECK: KILL $r2
+    ; CHECK: KILL $r3
+    ; CHECK: KILL $r4
+    ; CHECK: KILL $r5
+    ; CHECK: KILL $r6
+    ; CHECK: KILL $r7
+    ; CHECK: KILL $r8
+    ; CHECK: KILL $r9
+    ; CHECK: KILL $r10
+    ; CHECK: KILL $r11
+    ; CHECK: KILL $r12
+    ; CHECK: KILL $lr
+    $r0 = IMPLICIT_DEF
+    $r1 = IMPLICIT_DEF
+    $r2 = IMPLICIT_DEF
+    $r3 = IMPLICIT_DEF
+    $r4 = IMPLICIT_DEF
+    $r5 = IMPLICIT_DEF
+    $r6 = IMPLICIT_DEF
+    $r7 = IMPLICIT_DEF
+    $r8 = IMPLICIT_DEF
+    $r9 = IMPLICIT_DEF
+    $r10 = IMPLICIT_DEF
+    $r11 = IMPLICIT_DEF
+    $r12 = IMPLICIT_DEF
+    $lr = IMPLICIT_DEF
+
+    renamable $q2 = MVE_VLDRBU8 %stack.0, 0, 0, $noreg :: (load 4 from %stack.0)
+
+    KILL $r0
+    KILL $r1
+    KILL $r2
+    KILL $r3
+    KILL $r4
+    KILL $r5
+    KILL $r6
+    KILL $r7
+    KILL $r8
+    KILL $r9
+    KILL $r10
+    KILL $r11
+    KILL $r12
+    KILL $lr
+...




More information about the llvm-commits mailing list