[llvm] f13beac - [AArch64][SVE] Preserve full vector regs over EH edge.

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 2 02:54:59 PDT 2020


Author: Sander de Smalen
Date: 2020-09-02T10:54:18+01:00
New Revision: f13beac51be02cae21bce465206a920ecdca7566

URL: https://github.com/llvm/llvm-project/commit/f13beac51be02cae21bce465206a920ecdca7566
DIFF: https://github.com/llvm/llvm-project/commit/f13beac51be02cae21bce465206a920ecdca7566.diff

LOG: [AArch64][SVE] Preserve full vector regs over EH edge.

Unwinders may only preserve the lower 64bits of Neon and SVE registers,
as only the registers in the base ABI are guaranteed to be preserved
over the exception edge. The caller will need to preserve additional
registers for when the call throws an exception and the unwinder has
tried to recover state.

For  e.g.

    svint32_t bar(svint32_t);
    svint32_t foo(svint32_t x, bool *err) {
      try { bar(x); } catch (...) { *err = true; }
      return x;
    }

`z0` needs to be spilled before the call to `bar(x)` and reloaded before
returning from foo, as the exception handler may have clobbered z0.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D84737

Added: 
    llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
    llvm/test/CodeGen/AArch64/unwind-preserved.ll

Modified: 
    llvm/include/llvm/CodeGen/TargetRegisterInfo.h
    llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
    llvm/lib/CodeGen/LiveIntervals.cpp
    llvm/lib/CodeGen/MIRParser/MIRParser.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
    llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
    llvm/lib/Target/AArch64/AArch64RegisterInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index d921c4c9028b..e4e92581b893 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -449,6 +449,13 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return nullptr;
   }
 
+  /// Return a register mask for the registers preserved by the unwinder,
+  /// or nullptr if no custom mask is needed.
+  virtual const uint32_t *
+  getCustomEHPadPreservedMask(const MachineFunction &MF) const {
+    return nullptr;
+  }
+
   /// Return a register mask that clobbers everything.
   virtual const uint32_t *getNoPreservedMask() const {
     llvm_unreachable("target does not provide no preserved mask");

diff  --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index c14f0ef882a8..cce0ca938c9f 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2108,6 +2108,12 @@ bool IRTranslator::translateLandingPad(const User &U,
   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
     .addSym(MF->addLandingPad(&MBB));
 
+  // If the unwinder does not preserve all registers, ensure that the
+  // function marks the clobbered registers as used.
+  const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+  if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+    MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
   LLT Ty = getLLTForType(*LP.getType(), *DL);
   Register Undef = MRI->createGenericVirtualRegister(Ty);
   MIRBuilder.buildUndef(Undef);

diff  --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 6539de107279..b60fea6fb4e3 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -225,6 +225,15 @@ void LiveIntervals::computeRegMasks() {
       RegMaskBits.push_back(Mask);
     }
 
+    // Unwinders may clobber additional registers.
+    // FIXME: This functionality can possibly be merged into
+    // MachineBasicBlock::getBeginClobberMask().
+    if (MBB.isEHPad())
+      if (auto *Mask = TRI->getCustomEHPadPreservedMask(*MBB.getParent())) {
+        RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+        RegMaskBits.push_back(Mask);
+      }
+
     for (const MachineInstr &MI : MBB) {
       for (const MachineOperand &MO : MI.operands()) {
         if (!MO.isRegMask())

diff  --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index badf5868c757..945a560de3ca 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -639,6 +639,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
 
   // Compute MachineRegisterInfo::UsedPhysRegMask
   for (const MachineBasicBlock &MBB : MF) {
+    // Make sure MRI knows about registers clobbered by unwinder.
+    const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+    if (MBB.isEHPad())
+      if (auto *RegMask = TRI->getCustomEHPadPreservedMask(MF))
+        MRI.addPhysRegsUsedFromRegMask(RegMask);
+
     for (const MachineInstr &MI : MBB) {
       for (const MachineOperand &MO : MI.operands()) {
         if (!MO.isRegMask())

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1f0432196a2d..8650cfceb86c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1251,6 +1251,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
+  // If the unwinder does not preserve all registers, ensure that the
+  // function marks the clobbered registers as used.
+  const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
+  if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
+    MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
+
   if (Pers == EHPersonality::Wasm_CXX) {
     if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI()))
       mapWasmLandingPadIndex(MBB, CPI);

diff  --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index f719cc6e5e4e..2f1317d8f1ea 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -240,6 +240,14 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
     return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
 }
 
+const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask(
+    const MachineFunction &MF) const {
+  if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux())
+    return CSR_AArch64_AAPCS_RegMask;
+
+  return nullptr;
+}
+
 const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
   if (TT.isOSDarwin())
     return CSR_Darwin_AArch64_TLS_RegMask;

diff  --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 7b20f181e76d..e3c8a77f433f 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -72,6 +72,10 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
   // Funclets on ARM64 Windows don't preserve any registers.
   const uint32_t *getNoPreservedMask() const override;
 
+  // Unwinders may not preserve all Neon and SVE registers.
+  const uint32_t *
+  getCustomEHPadPreservedMask(const MachineFunction &MF) const override;
+
   /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
   /// case that 'returned' is on an i64 first argument if the calling convention
   /// is one that can (partially) model this attribute with a preserved mask

diff  --git a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
new file mode 100644
index 000000000000..aacc3c6542c7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
@@ -0,0 +1,143 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=regallocfast,prologepilog -simplify-mir -o - %s | FileCheck %s
+
+# This test checks that the MIRParser correctly clobbers the registers
+# that are not preserved by the unwinder (when the MIR contains an EH pad).
+--- |
+  ; ModuleID = '<stdin>'
+  source_filename = "<stdin>"
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64-unknown-linux-gnu"
+
+  define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) personality i8 0 {
+    %result = invoke aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v)
+            to label %.Lcontinue unwind label %.Lunwind
+
+  .Lcontinue:                                       ; preds = %0
+    ret <4 x i32> %result
+
+  .Lunwind:                                         ; preds = %0
+    %lp = landingpad { i8*, i32 }
+            cleanup
+    ret <4 x i32> %v
+  }
+
+  declare aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32>)
+
+...
+---
+name:            invoke_callee_may_throw_neon
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: fpr128 }
+  - { id: 1, class: fpr128 }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+liveins:
+  - { reg: '$q0' }
+frameInfo:
+  maxAlignment:    1
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 0
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: invoke_callee_may_throw_neon
+  ; CHECK: bb.0 (%ir-block.0):
+  ; CHECK:   successors: %bb.1, %bb.2
+  ; CHECK:   liveins: $q0, $q22, $q23, $q20, $q21, $q18, $q19, $q16, $q17, $q14, $q15, $q12, $q13, $q10, $q11, $q8, $q9, $lr, $fp
+  ; CHECK:   $sp = frame-setup SUBXri $sp, 304, 0
+  ; CHECK:   frame-setup STPQi killed $q23, killed $q22, $sp, 2 :: (store 16 into %stack.19), (store 16 into %stack.18)
+  ; CHECK:   frame-setup STPQi killed $q21, killed $q20, $sp, 4 :: (store 16 into %stack.17), (store 16 into %stack.16)
+  ; CHECK:   frame-setup STPQi killed $q19, killed $q18, $sp, 6 :: (store 16 into %stack.15), (store 16 into %stack.14)
+  ; CHECK:   frame-setup STPQi killed $q17, killed $q16, $sp, 8 :: (store 16 into %stack.13), (store 16 into %stack.12)
+  ; CHECK:   frame-setup STPQi killed $q15, killed $q14, $sp, 10 :: (store 16 into %stack.11), (store 16 into %stack.10)
+  ; CHECK:   frame-setup STPQi killed $q13, killed $q12, $sp, 12 :: (store 16 into %stack.9), (store 16 into %stack.8)
+  ; CHECK:   frame-setup STPQi killed $q11, killed $q10, $sp, 14 :: (store 16 into %stack.7), (store 16 into %stack.6)
+  ; CHECK:   frame-setup STPQi killed $q9, killed $q8, $sp, 16 :: (store 16 into %stack.5), (store 16 into %stack.4)
+  ; CHECK:   frame-setup STPXi killed $fp, killed $lr, $sp, 36 :: (store 8 into %stack.3), (store 8 into %stack.2)
+  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 304
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $w30, -8
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $w29, -16
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b8, -32
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b9, -48
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b10, -64
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b11, -80
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b12, -96
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b13, -112
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b14, -128
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b15, -144
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b16, -160
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b17, -176
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b18, -192
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b19, -208
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b20, -224
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b21, -240
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b22, -256
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $b23, -272
+  ; CHECK:   EH_LABEL <mcsymbol .Ltmp0>
+  ; CHECK:   STRQui $q0, $sp, 1 :: (store 16 into %stack.0)
+  ; CHECK:   BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def $lr, implicit $sp, implicit killed $q0, implicit-def $q0
+  ; CHECK:   EH_LABEL <mcsymbol .Ltmp1>
+  ; CHECK:   STRQui killed $q0, $sp, 0 :: (store 16 into %stack.1)
+  ; CHECK:   B %bb.1
+  ; CHECK: bb.1..Lcontinue:
+  ; CHECK:   $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1)
+  ; CHECK:   $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2)
+  ; CHECK:   $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4)
+  ; CHECK:   $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6)
+  ; CHECK:   $q13, $q12 = frame-destroy LDPQi $sp, 12 :: (load 16 from %stack.9), (load 16 from %stack.8)
+  ; CHECK:   $q15, $q14 = frame-destroy LDPQi $sp, 10 :: (load 16 from %stack.11), (load 16 from %stack.10)
+  ; CHECK:   $q17, $q16 = frame-destroy LDPQi $sp, 8 :: (load 16 from %stack.13), (load 16 from %stack.12)
+  ; CHECK:   $q19, $q18 = frame-destroy LDPQi $sp, 6 :: (load 16 from %stack.15), (load 16 from %stack.14)
+  ; CHECK:   $q21, $q20 = frame-destroy LDPQi $sp, 4 :: (load 16 from %stack.17), (load 16 from %stack.16)
+  ; CHECK:   $q23, $q22 = frame-destroy LDPQi $sp, 2 :: (load 16 from %stack.19), (load 16 from %stack.18)
+  ; CHECK:   $sp = frame-destroy ADDXri $sp, 304, 0
+  ; CHECK:   RET_ReallyLR implicit killed $q0
+  ; CHECK: bb.2..Lunwind (landing-pad):
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   EH_LABEL <mcsymbol .Ltmp2>
+  ; CHECK:   $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0)
+  ; CHECK:   $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2)
+  ; CHECK:   $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4)
+  ; CHECK:   $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6)
+  ; CHECK:   $q13, $q12 = frame-destroy LDPQi $sp, 12 :: (load 16 from %stack.9), (load 16 from %stack.8)
+  ; CHECK:   $q15, $q14 = frame-destroy LDPQi $sp, 10 :: (load 16 from %stack.11), (load 16 from %stack.10)
+  ; CHECK:   $q17, $q16 = frame-destroy LDPQi $sp, 8 :: (load 16 from %stack.13), (load 16 from %stack.12)
+  ; CHECK:   $q19, $q18 = frame-destroy LDPQi $sp, 6 :: (load 16 from %stack.15), (load 16 from %stack.14)
+  ; CHECK:   $q21, $q20 = frame-destroy LDPQi $sp, 4 :: (load 16 from %stack.17), (load 16 from %stack.16)
+  ; CHECK:   $q23, $q22 = frame-destroy LDPQi $sp, 2 :: (load 16 from %stack.19), (load 16 from %stack.18)
+  ; CHECK:   $sp = frame-destroy ADDXri $sp, 304, 0
+  ; CHECK:   RET_ReallyLR implicit killed $q0
+  bb.0 (%ir-block.0):
+    successors: %bb.1, %bb.2
+    liveins: $q0
+
+    %0:fpr128 = COPY $q0
+    EH_LABEL <mcsymbol .Ltmp0>
+    ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+    $q0 = COPY %0
+    BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def $lr, implicit $sp, implicit $q0, implicit-def $q0
+    %1:fpr128 = COPY $q0
+    ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+    EH_LABEL <mcsymbol .Ltmp1>
+    B %bb.1
+
+  bb.1..Lcontinue:
+    $q0 = COPY %1
+    RET_ReallyLR implicit $q0
+
+  bb.2..Lunwind (landing-pad):
+    liveins: $x0, $x1
+
+    EH_LABEL <mcsymbol .Ltmp2>
+    $q0 = COPY %0
+    RET_ReallyLR implicit $q0
+
+...

diff  --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll
new file mode 100644
index 000000000000..cf2a8e9b4a36
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll
@@ -0,0 +1,215 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -global-isel=0 -global-isel-abort=0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -O0 -global-isel=1 -global-isel-abort=0 < %s | FileCheck %s
+
+; Test that z0 is saved/restored, as the unwinder may only retain the low 64bits (d0).
+define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) personality i8 0 {
+; CHECK-LABEL: invoke_callee_may_throw_sve:
+; CHECK:       .Lfunc_begin0:
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-18
+; CHECK-NEXT:    str p15, [sp, #4, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p14, [sp, #5, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p13, [sp, #6, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p12, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p11, [sp, #8, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p10, [sp, #9, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p9, [sp, #10, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p8, [sp, #11, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p7, [sp, #12, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p6, [sp, #13, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p5, [sp, #14, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str p4, [sp, #15, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    str z23, [sp, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z22, [sp, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z21, [sp, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z20, [sp, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z19, [sp, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z18, [sp, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z17, [sp, #8, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z16, [sp, #9, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z15, [sp, #10, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z14, [sp, #11, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z13, [sp, #12, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z12, [sp, #13, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z11, [sp, #14, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z10, [sp, #15, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z9, [sp, #16, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    str z8, [sp, #17, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-2
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 16 - 16 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 16 - 24 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4b, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x60, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d11 @ cfa - 16 - 32 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4c, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x58, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d12 @ cfa - 16 - 40 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG
+; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    str z0, [sp, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    bl may_throw_sve
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_1: // %.Lcontinue
+; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #2
+; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #18
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_2: // %.Lunwind
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #2
+; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #18
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %result = invoke <vscale x 4 x i32> @may_throw_sve(<vscale x 4 x i32> %v) to label %.Lcontinue unwind label %.Lunwind
+.Lcontinue:
+  ret <vscale x 4 x i32> %result
+.Lunwind:
+  %lp = landingpad { i8*, i32 } cleanup
+  ret <vscale x 4 x i32> %v;
+}
+
+declare <vscale x 4 x i32> @may_throw_sve(<vscale x 4 x i32> %v);
+
+
+; Test that q0 is saved/restored, as the unwinder may only retain the low 64bits (d0).
+define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v) personality i8 0 {
+; CHECK-LABEL: invoke_callee_may_throw_neon:
+; CHECK:       .Lfunc_begin1:
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #304 // =304
+; CHECK-NEXT:    stp q23, q22, [sp, #32] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q21, q20, [sp, #64] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q19, q18, [sp, #96] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q17, q16, [sp, #128] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q15, q14, [sp, #160] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q13, q12, [sp, #192] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q11, q10, [sp, #224] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q9, q8, [sp, #256] // 32-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 304
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset b8, -32
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    .cfi_offset b10, -64
+; CHECK-NEXT:    .cfi_offset b11, -80
+; CHECK-NEXT:    .cfi_offset b12, -96
+; CHECK-NEXT:    .cfi_offset b13, -112
+; CHECK-NEXT:    .cfi_offset b14, -128
+; CHECK-NEXT:    .cfi_offset b15, -144
+; CHECK-NEXT:    .cfi_offset b16, -160
+; CHECK-NEXT:    .cfi_offset b17, -176
+; CHECK-NEXT:    .cfi_offset b18, -192
+; CHECK-NEXT:    .cfi_offset b19, -208
+; CHECK-NEXT:    .cfi_offset b20, -224
+; CHECK-NEXT:    .cfi_offset b21, -240
+; CHECK-NEXT:    .cfi_offset b22, -256
+; CHECK-NEXT:    .cfi_offset b23, -272
+; CHECK-NEXT:  .Ltmp3:
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    bl may_throw_neon
+; CHECK-NEXT:  .Ltmp4:
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    b .LBB1_1
+; CHECK-NEXT:  .LBB1_1: // %.Lcontinue
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp q9, q8, [sp, #256] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q11, q10, [sp, #224] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q13, q12, [sp, #192] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q15, q14, [sp, #160] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q17, q16, [sp, #128] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q19, q18, [sp, #96] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q21, q20, [sp, #64] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q23, q22, [sp, #32] // 32-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #304 // =304
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_2: // %.Lunwind
+; CHECK-NEXT:  .Ltmp5:
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp q9, q8, [sp, #256] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q11, q10, [sp, #224] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q13, q12, [sp, #192] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q15, q14, [sp, #160] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q17, q16, [sp, #128] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q19, q18, [sp, #96] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q21, q20, [sp, #64] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q23, q22, [sp, #32] // 32-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #304 // =304
+; CHECK-NEXT:    ret
+  %result = invoke aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v) to label %.Lcontinue unwind label %.Lunwind
+.Lcontinue:
+  ret <4 x i32> %result
+.Lunwind:
+  %lp = landingpad { i8*, i32 } cleanup
+  ret <4 x i32> %v;
+}
+
+declare aarch64_vector_pcs <4 x i32> @may_throw_neon(<4 x i32> %v);


        


More information about the llvm-commits mailing list