[llvm] ea834c8 - Revert "[AArch64][SVE] Allow accesses to SVE stack objects to use frame pointer"

Thu Mar 11 05:33:41 PST 2021

Author: Bradley Smith
Date: 2021-03-11T13:32:35Z
New Revision: ea834c8365ca7c4f0b491f5de98c521df9401ea9

URL: https://github.com/llvm/llvm-project/commit/ea834c8365ca7c4f0b491f5de98c521df9401ea9
DIFF: https://github.com/llvm/llvm-project/commit/ea834c8365ca7c4f0b491f5de98c521df9401ea9.diff

LOG: Revert "[AArch64][SVE] Allow accesses to SVE stack objects to use frame pointer"

This patch introduced codegen faults.  An attempt to fix this was done
in https://reviews.llvm.org/D97193, but ultimately it was decided to
approach this differently.

This reverts commit 42635856ed3c9a05957640f9deb50cf865c03825.

Differential Revision: https://reviews.llvm.org/D98350

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
    llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir
    llvm/test/CodeGen/AArch64/framelayout-sve.mir
    llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index e2331e83b384..f5df1c5e2929 100644

--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -352,7 +352,6 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
 /// pointer register.
 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
-  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
   // Win64 EH requires a frame pointer if funclets are present, as the locals
   // are accessed off the frame pointer in both the parent function and the
@@ -377,14 +376,6 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
   if (!MFI.isMaxCallFrameSizeComputed() ||
       MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
     return true;
-  // If there are both SVE and non-SVE objects on the stack, make the frame
-  // pointer available since it may be more performant to use it.
-  uint64_t CalleeStackSize = AFI->isCalleeSavedStackSizeComputed()
-                                 ? AFI->getCalleeSavedStackSize()
-                                 : 0;
-  uint64_t NonSVEStackSize = MFI.getStackSize() - CalleeStackSize;
-  if (AFI->getStackSizeSVE() && NonSVEStackSize)
-    return true;
 
   return false;
 }
@@ -1980,6 +1971,10 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
   // right thing for the emergency spill slot.
   bool UseFP = false;
   if (AFI->hasStackFrame() && !isSVE) {
+    // We shouldn't prefer using the FP when there is an SVE area
+    // in between the FP and the non-SVE locals/spills.
+    PreferFP &= !SVEStackSize;
+
     // Note: Keeping the following as multiple 'if' statements rather than
     // merging to a single expression for readability.
     //
@@ -2000,10 +1995,6 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
       bool FPOffsetFits = !ForSimm || FPOffset >= -256;
       PreferFP |= Offset > -FPOffset;
 
-      // The FP offset will not fit if there is an SVE area in the way.
-      if (SVEStackSize && FPOffset < 0)
-        FPOffsetFits = false;
-
       if (MFI.hasVarSizedObjects()) {
         // If we have variable sized objects, we can use either FP or BP, as the
         // SP offset is unknown. We can use the base pointer if we have one and

diff  --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 0298d9e5c358..f60e2b6c316e 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -248,10 +248,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
     return getCalleeSavedStackSize();
   }
 
-  bool isCalleeSavedStackSizeComputed() const {
-    return HasCalleeSavedStackSize;
-  }
-
   unsigned getCalleeSavedStackSize() const {
     assert(HasCalleeSavedStackSize &&
            "CalleeSavedStackSize has not been calculated");

diff  --git a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir
index 3e65e40449ee..75917ef32ae2 100644
--- a/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir
+++ b/llvm/test/CodeGen/AArch64/debug-info-sve-dbg-value.mir
@@ -12,16 +12,16 @@
 # CHECK1: : DW_OP_breg31 WSP+16)
 # CHECK1: DW_AT_type {{.*}}ty32
 #
-# CHECK2: : DW_OP_breg29 W29+0, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus)
+# CHECK2: : DW_OP_breg31 WSP+16, DW_OP_lit16, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus)
 # CHECK2: DW_AT_type {{.*}}svint32_t
 #
-# CHECK3: : DW_OP_breg29 W29+0, DW_OP_lit16, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus)
+# CHECK3: : DW_OP_breg31 WSP+16, DW_OP_lit8, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus)
 # CHECK3: DW_AT_type {{.*}}svint32_t
 #
-# CHECK4: : DW_OP_breg29 W29+0, DW_OP_lit17, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus)
+# CHECK4: : DW_OP_breg31 WSP+16, DW_OP_lit7, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus)
 # CHECK4: DW_AT_type {{.*}}svbool_t
 #
-# CHECK5: : DW_OP_breg29 W29+0, DW_OP_lit18, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_minus)
+# CHECK5: : DW_OP_breg31 WSP+16, DW_OP_lit6, DW_OP_bregx VG+0, DW_OP_mul, DW_OP_plus)
 # CHECK5: DW_AT_type {{.*}}svbool_t
 
 --- |

diff  --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 3418228c3501..b3d17cf17bbe 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -57,7 +57,6 @@
 
 # CHECK:      bb.0.entry:
 # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
-# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION
@@ -68,9 +67,11 @@
 # CHECK-NEXT: RET_ReallyLR
 
 # ASM-LABEL: test_allocate_sve:
-# ASM:       .cfi_offset w29, -16
+# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 16 * VG
+# ASM-NEXT:  .cfi_offset w29, -16
 #
-# UNWINDINFO: DW_CFA_offset: reg29 -16
+# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -16
 name:            test_allocate_sve
 stack:
   - { id: 0, stack-id: scalable-vector, size: 18, alignment: 2 }
@@ -95,7 +96,6 @@ body:             |
 # CHECK:      bb.0.entry:
 # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32
 # CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2
-# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
 # CHECK-COUNT-4: frame-setup CFI_INSTRUCTION
@@ -109,11 +109,13 @@ body:             |
 # CHECK-NEXT: RET_ReallyLR
 #
 # ASM-LABEL: test_allocate_sve_gpr_callee_saves:
-# ASM:       .cfi_offset w20, -8
+# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 16 * VG
+# ASM-NEXT:  .cfi_offset w20, -8
 # ASM-NEXT:  .cfi_offset w21, -16
 # ASM-NEXT:  .cfi_offset w29, -32
 #
-# UNWINDINFO:       DW_CFA_offset: reg20 -8
+# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO-NEXT:  DW_CFA_offset: reg20 -8
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg21 -16
 # UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -32
 name:            test_allocate_sve_gpr_callee_saves
@@ -182,14 +184,16 @@ body:             |
 
 # CHECK:      bb.0.entry:
 # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
-# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION
 
-# CHECK-NEXT: STR_ZXI $z0, $fp, -1
-# CHECK-NEXT: STR_ZXI $z1, $fp, -2
-# CHECK-NEXT: STR_PXI $p0, $fp, -17
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 1
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 7
 
 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
@@ -197,9 +201,11 @@ body:             |
 # CHECK-NEXT: RET_ReallyLR
 #
 # ASM-LABEL:  test_address_sve:
-# ASM:        .cfi_offset w29, -16
+# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 24 * VG
+# ASM-NEXT:  .cfi_offset w29, -16
 #
-# UNWINDINFO: DW_CFA_offset: reg29 -16
+# UNWINDINFO:       DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO-NEXT:  DW_CFA_offset: reg29 -16
 
 name:            test_address_sve
 frameInfo:
@@ -292,12 +298,12 @@ body:             |
 
 # CHECK:      bb.0.entry:
 # CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
-# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
 # CHECK-COUNT-2: frame-setup CFI_INSTRUCTION
 
-# CHECK-NEXT: $x0 = LDRXui $fp, 2
+# CHECK:      $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
+# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
 
 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1
 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
@@ -305,9 +311,11 @@ body:             |
 # CHECK-NEXT: RET_ReallyLR
 #
 # ASM-LABEL: test_stack_arg_sve:
-# ASM:       .cfi_offset w29, -16
+# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
+# ASM-NEXT:  .cfi_offset w29, -16
 #
-# UNWINDINFO: DW_CFA_offset: reg29 -16
+# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +32, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
 
 name:             test_stack_arg_sve
 fixedStack:
@@ -452,9 +460,11 @@ body:             |
 # CHECK: RET_ReallyLR
 #
 # ASM-LABEL: save_restore_pregs_sve:
-# ASM:       .cfi_offset w29, -16
+# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 8 * VG
+# ASM-NEXT:  .cfi_offset w29, -16
 #
-# UNWINDINFO: DW_CFA_offset: reg29 -16
+# UNWINDINFO:         DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO-NEXT:    DW_CFA_offset: reg29 -16
 name: save_restore_pregs_sve
 stack:
   - { id: 0, stack-id: default, size: 32, alignment: 16 }
@@ -470,7 +480,6 @@ body:             |
 ...
 # CHECK-LABEL: name: save_restore_zregs_sve
 # CHECK:      $sp = frame-setup STRXpre killed $fp, $sp, -16
-# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
 # CHECK-NEXT: frame-setup STR_ZXI killed $z10, $sp, 0
 # CHECK-NEXT: frame-setup STR_ZXI killed $z9, $sp, 1
@@ -487,11 +496,13 @@ body:             |
 # CHECK-NEXT: RET_ReallyLR
 #
 # ASM-LABEL: save_restore_zregs_sve:
-# ASM:       .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8  @ cfa - 16 - 8 * VG
+# ASM:       .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 48 + 24 * VG
+# ASM-NEXT:  .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8  @ cfa - 16 - 8 * VG
 # ASM-NEXT:  .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9  @ cfa - 16 - 16 * VG
 # ASM-NEXT:  .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10  @ cfa - 16 - 24 * VG
 
-# UNWINDINFO:      DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +48, DW_OP_plus, DW_OP_consts +24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -16, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
@@ -547,7 +558,8 @@ body:             |
 # CHECK: RET_ReallyLR
 #
 # ASM-LABEL: save_restore_sve:
-# ASM:       .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
+# ASM:       .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x98, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 64 + 152 * VG
+# ASM-NEXT:  .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
 # ASM-NEXT:  .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
 # ASM-NEXT:  .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
 # ASM-NEXT:  .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 32 - 32 * VG
@@ -560,7 +572,8 @@ body:             |
 # ASM-NEXT:  .cfi_offset w21, -24
 # ASM-NEXT:  .cfi_offset w29, -32
 #
-# UNWINDINFO:      DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO:      DW_CFA_def_cfa_expression: DW_OP_breg31 +0, DW_OP_consts +64, DW_OP_plus, DW_OP_consts +152, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
+# UNWINDINFO-NEXT: DW_CFA_expression: reg72 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -8, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO-NEXT: DW_CFA_expression: reg73 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -16, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO-NEXT: DW_CFA_expression: reg74 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -24, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus
 # UNWINDINFO-NEXT: DW_CFA_expression: reg75 DW_OP_consts -32, DW_OP_plus, DW_OP_consts -32, DW_OP_bregx 0x2e +0, DW_OP_mul, DW_OP_plus

diff  --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
index f6bcb7e2e2d2..806dd7e57dee 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
@@ -41,18 +41,15 @@ define float @foo2(double* %x0, double* %x1) nounwind {
 ; CHECK-LABEL: foo2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
-; CHECK-NEXT:    mov x29, sp
 ; CHECK-NEXT:    addvl sp, sp, #-4
+; CHECK-NEXT:    sub sp, sp, #16 // =16
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
 ; CHECK-NEXT:    ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    addvl x8, x29, #-4
+; CHECK-NEXT:    add x8, sp, #16 // =16
+; CHECK-NEXT:    add x9, sp, #16 // =16
 ; CHECK-NEXT:    fmov s0, #1.00000000
-; CHECK-NEXT:    st1d { z16.d }, p0, [x29, #-4, mul vl]
-; CHECK-NEXT:    st1d { z17.d }, p0, [x8, #1, mul vl]
-; CHECK-NEXT:    st1d { z18.d }, p0, [x8, #2, mul vl]
-; CHECK-NEXT:    st1d { z19.d }, p0, [x8, #3, mul vl]
 ; CHECK-NEXT:    mov w1, #1
 ; CHECK-NEXT:    mov w2, #2
 ; CHECK-NEXT:    mov w3, #3
@@ -60,8 +57,12 @@ define float @foo2(double* %x0, double* %x1) nounwind {
 ; CHECK-NEXT:    mov w5, #5
 ; CHECK-NEXT:    mov w6, #6
 ; CHECK-NEXT:    mov w7, #7
-; CHECK-NEXT:    str x8, [sp, #-16]!
 ; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    st1d { z16.d }, p0, [x9]
+; CHECK-NEXT:    st1d { z17.d }, p0, [x8, #1, mul vl]
+; CHECK-NEXT:    st1d { z18.d }, p0, [x8, #2, mul vl]
+; CHECK-NEXT:    st1d { z19.d }, p0, [x8, #3, mul vl]
+; CHECK-NEXT:    str x8, [sp]
 ; CHECK-NEXT:    bl callee2
 ; CHECK-NEXT:    addvl sp, sp, #4
 ; CHECK-NEXT:    add sp, sp, #16 // =16