[llvm] 9c85382 - [RISCV] Handle register spill in branch relaxation

ZHU Zijia via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 23 22:31:20 PDT 2022


Author: ZHU Zijia
Date: 2022-08-24T13:27:56+08:00
New Revision: 9c85382aded804ddf55aa0e5ed60a5cb9fb275fe

URL: https://github.com/llvm/llvm-project/commit/9c85382aded804ddf55aa0e5ed60a5cb9fb275fe
DIFF: https://github.com/llvm/llvm-project/commit/9c85382aded804ddf55aa0e5ed60a5cb9fb275fe.diff

LOG: [RISCV] Handle register spill in branch relaxation

In branch relaxation pass, `j`'s with offset over 1MiB will be relaxed
to `jump` pseudo-instructions.

This patch allocates a stack slot for functions with a size greater than
1MiB. If the register scavenger cannot find a scratch register for
`jump`, spill a register to the slot before the jump and restore it
after the jump.

.mbb:
        foo
        j       .dest_bb
        bar
        bar
        bar
.dest_bb:
        baz

The above code will be relaxed to the following code.

.mbb:
        foo
        sd      s11, 0(sp)
        jump    .restore_bb, s11
        bar
        bar
        bar
        j       .dest_bb
.restore_bb:
        ld      s11, 0(sp)
.dest_bb:
        baz

Depends on D129999.

Reviewed By: StephenFan

Differential Revision: https://reviews.llvm.org/D130560

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
    llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
    llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
    llvm/test/CodeGen/RISCV/branch-relaxation.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index d83ba01db853..439189befaea 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1015,10 +1015,49 @@ static bool hasRVVFrameObject(const MachineFunction &MF) {
   return MF.getSubtarget<RISCVSubtarget>().hasVInstructions();
 }
 
+static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF,
+                                            const RISCVInstrInfo &TII) {
+  unsigned FnSize = 0;
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      // Far branches over 20-bit offset will be relaxed in branch relaxation
+      // pass. In the worst case, conditional branches will be relaxed into
+      // the following instruction sequence. Unconditional branches are
+      // relaxed in the same way, with the exception that there is no first
+      // branch instruction.
+      //
+      //        foo
+      //        bne     t5, t6, .rev_cond # `TII->getInstSizeInBytes(MI)` bytes
+      //        sd      s11, 0(sp)        # 4 bytes, or 2 bytes in RVC
+      //        jump    .restore, s11     # 8 bytes
+      // .rev_cond
+      //        bar
+      //        j       .dest_bb          # 4 bytes, or 2 bytes in RVC
+      // .restore:
+      //        ld      s11, 0(sp)        # 4 bytes, or 2 bytes in RVC
+      // .dest:
+      //        baz
+      if (MI.isConditionalBranch())
+        FnSize += TII.getInstSizeInBytes(MI);
+      if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
+        if (MF.getSubtarget<RISCVSubtarget>().hasStdExtC())
+          FnSize += 2 + 8 + 2 + 2;
+        else
+          FnSize += 4 + 8 + 4 + 4;
+        continue;
+      }
+
+      FnSize += TII.getInstSizeInBytes(MI);
+    }
+  }
+  return FnSize;
+}
+
 void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
     MachineFunction &MF, RegScavenger *RS) const {
   const RISCVRegisterInfo *RegInfo =
       MF.getSubtarget<RISCVSubtarget>().getRegisterInfo();
+  const RISCVInstrInfo *TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterClass *RC = &RISCV::GPRRegClass;
   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
@@ -1037,23 +1076,31 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
     MFI.ensureMaxAlignment(RVVStackAlign);
   }
 
+  unsigned ScavSlotsNum = 0;
+
   // estimateStackSize has been observed to under-estimate the final stack
   // size, so give ourselves wiggle-room by checking for stack size
   // representable an 11-bit signed field rather than 12-bits.
-  // FIXME: It may be possible to craft a function with a small stack that
-  // still needs an emergency spill slot for branch relaxation. This case
-  // would currently be missed.
-  // RVV loads & stores have no capacity to hold the immediate address offsets
-  // so we must always reserve an emergency spill slot if the MachineFunction
-  // contains any RVV spills.
-  unsigned ScavSlotsNum = 0;
   if (!isInt<11>(MFI.estimateStackSize(MF)))
     ScavSlotsNum = 1;
 
+  // Far branches over 20-bit offset require a spill slot for scratch register.
+  bool IsLargeFunction = !isInt<20>(estimateFunctionSizeInBytes(MF, *TII));
+  if (IsLargeFunction)
+    ScavSlotsNum = std::max(ScavSlotsNum, 1u);
+
+  // RVV loads & stores have no capacity to hold the immediate address offsets
+  // so we must always reserve an emergency spill slot if the MachineFunction
+  // contains any RVV spills.
   ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF));
-  for (unsigned i = 0; i < ScavSlotsNum; i++) {
-    RS->addScavengingFrameIndex(MFI.CreateStackObject(
-        RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false));
+
+  for (unsigned I = 0; I < ScavSlotsNum; I++) {
+    int FI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC),
+                                   RegInfo->getSpillAlign(*RC), false);
+    RS->addScavengingFrameIndex(FI);
+
+    if (IsLargeFunction && RVFI->getBranchRelaxationScratchFrameIndex() == -1)
+      RVFI->setBranchRelaxationScratchFrameIndex(FI);
   }
 
   if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) {

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 94ad2cf0b550..06d3e207008f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -958,9 +958,13 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
   assert(MBB.empty() &&
          "new block should be inserted for expanding unconditional branch");
   assert(MBB.pred_size() == 1);
+  assert(RestoreBB.empty() &&
+         "restore block should be inserted for restoring clobbered registers");
 
   MachineFunction *MF = MBB.getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
+  RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
 
   if (!isInt<32>(BrOffset))
     report_fatal_error(
@@ -971,19 +975,43 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
   // uses the same workaround).
   Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
   auto II = MBB.end();
-
+  // We may also update the jump target to RestoreBB later.
   MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
                           .addReg(ScratchReg, RegState::Define | RegState::Dead)
                           .addMBB(&DestBB, RISCVII::MO_CALL);
 
   RS->enterBasicBlockEnd(MBB);
-  Register Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
-                                                MI.getIterator(), false, 0);
-  // TODO: The case when there is no scavenged register needs special handling.
-  assert(Scav != RISCV::NoRegister && "No register is scavenged!");
-  MRI.replaceRegWith(ScratchReg, Scav);
+  Register TmpGPR =
+      RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
+                                    /*RestoreAfter=*/false, /*SpAdj=*/0,
+                                    /*AllowSpill=*/false);
+  if (TmpGPR != RISCV::NoRegister)
+    RS->setRegUsed(TmpGPR);
+  else {
+    // The case when there is no scavenged register needs special handling.
+
+    // Pick s11 because it doesn't make a 
diff erence.
+    TmpGPR = RISCV::X27;
+
+    int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
+    if (FrameIndex == -1)
+      report_fatal_error("underestimated function size");
+
+    storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
+                        &RISCV::GPRRegClass, TRI);
+    TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
+                             /*SpAdj=*/0, /*FIOperandNum=*/1);
+
+    MI.getOperand(1).setMBB(&RestoreBB);
+
+    loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
+                         &RISCV::GPRRegClass, TRI);
+    TRI->eliminateFrameIndex(RestoreBB.back(),
+                             /*SpAdj=*/0, /*FIOperandNum=*/1);
+  }
+
+  MRI.replaceRegWith(ScratchReg, TmpGPR);
   MRI.clearVirtRegs();
-  RS->setRegUsed(Scav);
 }
 
 bool RISCVInstrInfo::reverseBranchCondition(

diff  --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 622767540d99..53f002e03e63 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -53,6 +53,8 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
   /// FrameIndex used for transferring values between 64-bit FPRs and a pair
   /// of 32-bit GPRs via the stack.
   int MoveF64FrameIndex = -1;
+  /// FrameIndex of the spill slot for the scratch register in BranchRelaxation.
+  int BranchRelaxationScratchFrameIndex = -1;
   /// Size of any opaque stack adjustment due to save/restore libcalls.
   unsigned LibCallStackSize = 0;
   /// Size of RVV stack.
@@ -85,6 +87,13 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
     return MoveF64FrameIndex;
   }
 
+  int getBranchRelaxationScratchFrameIndex() const {
+    return BranchRelaxationScratchFrameIndex;
+  }
+  void setBranchRelaxationScratchFrameIndex(int Index) {
+    BranchRelaxationScratchFrameIndex = Index;
+  }
+
   unsigned getLibCallStackSize() const { return LibCallStackSize; }
   void setLibCallStackSize(unsigned Size) { LibCallStackSize = Size; }
 

diff  --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
index c0166c217bad..8842b4191cc9 100644
--- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll
+++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
@@ -3,9 +3,18 @@
 ; RUN:   -o /dev/null 2>&1
 ; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs \
 ; RUN:   -filetype=obj < %s -o /dev/null 2>&1
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
 ; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -filetype=obj < %s \
+; RUN:   -o /dev/null 2>&1
+; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs \
+; RUN:   -filetype=obj < %s -o /dev/null 2>&1
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
+; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
 
 define void @relax_bcc(i1 %a) nounwind {
 ; CHECK-LABEL: relax_bcc:
@@ -32,9 +41,10 @@ tail:
 define i32 @relax_jal(i1 %a) nounwind {
 ; CHECK-LABEL: relax_jal:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    andi a0, a0, 1
 ; CHECK-NEXT:    bnez a0, .LBB1_1
-; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:  # %bb.4:
 ; CHECK-NEXT:    jump .LBB1_2, a0
 ; CHECK-NEXT:  .LBB1_1: # %iftrue
 ; CHECK-NEXT:    #APP
@@ -42,12 +52,13 @@ define i32 @relax_jal(i1 %a) nounwind {
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    .zero 1048576
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    li a0, 1
-; CHECK-NEXT:    ret
+; CHECK-NEXT:    j .LBB1_3
 ; CHECK-NEXT:  .LBB1_2: # %jmp
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:  .LBB1_3: # %tail
 ; CHECK-NEXT:    li a0, 1
+; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   br i1 %a, label %iftrue, label %jmp
 
@@ -66,3 +77,1221 @@ space:
 tail:
   ret i32 1
 }
+
+define void @relax_jal_spill_32() {
+; CHECK-RV32-LABEL: relax_jal_spill_32:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    addi sp, sp, -64
+; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-RV32-NEXT:    .cfi_offset s5, -28
+; CHECK-RV32-NEXT:    .cfi_offset s6, -32
+; CHECK-RV32-NEXT:    .cfi_offset s7, -36
+; CHECK-RV32-NEXT:    .cfi_offset s8, -40
+; CHECK-RV32-NEXT:    .cfi_offset s9, -44
+; CHECK-RV32-NEXT:    .cfi_offset s10, -48
+; CHECK-RV32-NEXT:    .cfi_offset s11, -52
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li ra, 1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t0, 5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t1, 6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t2, 7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s0, 8
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s1, 9
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a0, 10
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a1, 11
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a2, 12
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a3, 13
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a4, 14
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a5, 15
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a6, 16
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a7, 17
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s2, 18
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s3, 19
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s4, 20
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s5, 21
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s6, 22
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s7, 23
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s8, 24
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s9, 25
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s10, 26
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s11, 27
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t3, 28
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t4, 29
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t5, 30
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t6, 31
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    beq t5, t6, .LBB2_1
+; CHECK-RV32-NEXT:  # %bb.3:
+; CHECK-RV32-NEXT:    sw s11, 0(sp)
+; CHECK-RV32-NEXT:    jump .LBB2_4, s11
+; CHECK-RV32-NEXT:  .LBB2_1: # %branch_1
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    .zero 1048576
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    j .LBB2_2
+; CHECK-RV32-NEXT:  .LBB2_4: # %branch_2
+; CHECK-RV32-NEXT:    lw s11, 0(sp)
+; CHECK-RV32-NEXT:  .LBB2_2: # %branch_2
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use ra
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s8
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s9
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s10
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s11
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 64
+; CHECK-RV32-NEXT:    ret
+
+  %ra = call i32 asm sideeffect "addi ra, x0, 1", "={ra}"()
+  %t0 = call i32 asm sideeffect "addi t0, x0, 5", "={t0}"()
+  %t1 = call i32 asm sideeffect "addi t1, x0, 6", "={t1}"()
+  %t2 = call i32 asm sideeffect "addi t2, x0, 7", "={t2}"()
+  %s0 = call i32 asm sideeffect "addi s0, x0, 8", "={s0}"()
+  %s1 = call i32 asm sideeffect "addi s1, x0, 9", "={s1}"()
+  %a0 = call i32 asm sideeffect "addi a0, x0, 10", "={a0}"()
+  %a1 = call i32 asm sideeffect "addi a1, x0, 11", "={a1}"()
+  %a2 = call i32 asm sideeffect "addi a2, x0, 12", "={a2}"()
+  %a3 = call i32 asm sideeffect "addi a3, x0, 13", "={a3}"()
+  %a4 = call i32 asm sideeffect "addi a4, x0, 14", "={a4}"()
+  %a5 = call i32 asm sideeffect "addi a5, x0, 15", "={a5}"()
+  %a6 = call i32 asm sideeffect "addi a6, x0, 16", "={a6}"()
+  %a7 = call i32 asm sideeffect "addi a7, x0, 17", "={a7}"()
+  %s2 = call i32 asm sideeffect "addi s2, x0, 18", "={s2}"()
+  %s3 = call i32 asm sideeffect "addi s3, x0, 19", "={s3}"()
+  %s4 = call i32 asm sideeffect "addi s4, x0, 20", "={s4}"()
+  %s5 = call i32 asm sideeffect "addi s5, x0, 21", "={s5}"()
+  %s6 = call i32 asm sideeffect "addi s6, x0, 22", "={s6}"()
+  %s7 = call i32 asm sideeffect "addi s7, x0, 23", "={s7}"()
+  %s8 = call i32 asm sideeffect "addi s8, x0, 24", "={s8}"()
+  %s9 = call i32 asm sideeffect "addi s9, x0, 25", "={s9}"()
+  %s10 = call i32 asm sideeffect "addi s10, x0, 26", "={s10}"()
+  %s11 = call i32 asm sideeffect "addi s11, x0, 27", "={s11}"()
+  %t3 = call i32 asm sideeffect "addi t3, x0, 28", "={t3}"()
+  %t4 = call i32 asm sideeffect "addi t4, x0, 29", "={t4}"()
+  %t5 = call i32 asm sideeffect "addi t5, x0, 30", "={t5}"()
+  %t6 = call i32 asm sideeffect "addi t6, x0, 31", "={t6}"()
+
+  %cmp = icmp eq i32 %t5, %t6
+  br i1 %cmp, label %branch_1, label %branch_2
+
+branch_1:
+  call void asm sideeffect ".space 1048576", ""()
+  br label %branch_2
+
+branch_2:
+  call void asm sideeffect "# reg use $0", "{ra}"(i32 %ra)
+  call void asm sideeffect "# reg use $0", "{t0}"(i32 %t0)
+  call void asm sideeffect "# reg use $0", "{t1}"(i32 %t1)
+  call void asm sideeffect "# reg use $0", "{t2}"(i32 %t2)
+  call void asm sideeffect "# reg use $0", "{s0}"(i32 %s0)
+  call void asm sideeffect "# reg use $0", "{s1}"(i32 %s1)
+  call void asm sideeffect "# reg use $0", "{a0}"(i32 %a0)
+  call void asm sideeffect "# reg use $0", "{a1}"(i32 %a1)
+  call void asm sideeffect "# reg use $0", "{a2}"(i32 %a2)
+  call void asm sideeffect "# reg use $0", "{a3}"(i32 %a3)
+  call void asm sideeffect "# reg use $0", "{a4}"(i32 %a4)
+  call void asm sideeffect "# reg use $0", "{a5}"(i32 %a5)
+  call void asm sideeffect "# reg use $0", "{a6}"(i32 %a6)
+  call void asm sideeffect "# reg use $0", "{a7}"(i32 %a7)
+  call void asm sideeffect "# reg use $0", "{s2}"(i32 %s2)
+  call void asm sideeffect "# reg use $0", "{s3}"(i32 %s3)
+  call void asm sideeffect "# reg use $0", "{s4}"(i32 %s4)
+  call void asm sideeffect "# reg use $0", "{s5}"(i32 %s5)
+  call void asm sideeffect "# reg use $0", "{s6}"(i32 %s6)
+  call void asm sideeffect "# reg use $0", "{s7}"(i32 %s7)
+  call void asm sideeffect "# reg use $0", "{s8}"(i32 %s8)
+  call void asm sideeffect "# reg use $0", "{s9}"(i32 %s9)
+  call void asm sideeffect "# reg use $0", "{s10}"(i32 %s10)
+  call void asm sideeffect "# reg use $0", "{s11}"(i32 %s11)
+  call void asm sideeffect "# reg use $0", "{t3}"(i32 %t3)
+  call void asm sideeffect "# reg use $0", "{t4}"(i32 %t4)
+  call void asm sideeffect "# reg use $0", "{t5}"(i32 %t5)
+  call void asm sideeffect "# reg use $0", "{t6}"(i32 %t6)
+
+  ret void
+}
+
+define void @relax_jal_spill_32_adjust_spill_slot() {
+; CHECK-RV32-LABEL: relax_jal_spill_32_adjust_spill_slot:
+; CHECK-RV32:       # %bb.0:
+; CHECK-RV32-NEXT:    addi sp, sp, -2032
+; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 2032
+; CHECK-RV32-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s0, 2024(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s1, 2020(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s2, 2016(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s3, 2012(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s4, 2008(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s5, 2004(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s6, 2000(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s7, 1996(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s8, 1992(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s9, 1988(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s10, 1984(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    sw s11, 1980(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-RV32-NEXT:    .cfi_offset s5, -28
+; CHECK-RV32-NEXT:    .cfi_offset s6, -32
+; CHECK-RV32-NEXT:    .cfi_offset s7, -36
+; CHECK-RV32-NEXT:    .cfi_offset s8, -40
+; CHECK-RV32-NEXT:    .cfi_offset s9, -44
+; CHECK-RV32-NEXT:    .cfi_offset s10, -48
+; CHECK-RV32-NEXT:    .cfi_offset s11, -52
+; CHECK-RV32-NEXT:    addi s0, sp, 2032
+; CHECK-RV32-NEXT:    .cfi_def_cfa s0, 0
+; CHECK-RV32-NEXT:    lui a0, 2
+; CHECK-RV32-NEXT:    addi a0, a0, -2032
+; CHECK-RV32-NEXT:    sub sp, sp, a0
+; CHECK-RV32-NEXT:    srli a0, sp, 12
+; CHECK-RV32-NEXT:    slli sp, a0, 12
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li ra, 1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t0, 5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t1, 6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t2, 7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s0, 8
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s1, 9
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a0, 10
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a1, 11
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a2, 12
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a3, 13
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a4, 14
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a5, 15
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a6, 16
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li a7, 17
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s2, 18
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s3, 19
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s4, 20
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s5, 21
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s6, 22
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s7, 23
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s8, 24
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s9, 25
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s10, 26
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li s11, 27
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t3, 28
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t4, 29
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t5, 30
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    li t6, 31
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    beq t5, t6, .LBB3_1
+; CHECK-RV32-NEXT:  # %bb.3:
+; CHECK-RV32-NEXT:    sw s11, 0(sp)
+; CHECK-RV32-NEXT:    jump .LBB3_4, s11
+; CHECK-RV32-NEXT:  .LBB3_1: # %branch_1
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    .zero 1048576
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    j .LBB3_2
+; CHECK-RV32-NEXT:  .LBB3_4: # %branch_2
+; CHECK-RV32-NEXT:    lw s11, 0(sp)
+; CHECK-RV32-NEXT:  .LBB3_2: # %branch_2
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use ra
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a0
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a1
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use a7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s2
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s7
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s8
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s9
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s10
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use s11
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t3
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t4
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t5
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    #APP
+; CHECK-RV32-NEXT:    # reg use t6
+; CHECK-RV32-NEXT:    #NO_APP
+; CHECK-RV32-NEXT:    lui a0, 2
+; CHECK-RV32-NEXT:    sub sp, s0, a0
+; CHECK-RV32-NEXT:    lui a0, 2
+; CHECK-RV32-NEXT:    addi a0, a0, -2032
+; CHECK-RV32-NEXT:    add sp, sp, a0
+; CHECK-RV32-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s1, 2020(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s2, 2016(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s3, 2012(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s4, 2008(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s5, 2004(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s6, 2000(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s7, 1996(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s8, 1992(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s9, 1988(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s10, 1984(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s11, 1980(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    addi sp, sp, 2032
+; CHECK-RV32-NEXT:    ret
+
+  ; If the stack is large and the offset of BranchRelaxationScratchFrameIndex
+  ; is out the range of 12-bit signed integer, check whether the spill slot is
+  ; adjusted to close to the stack base register.
+  %stack_obj = alloca i32, align 4096
+
+  %ra = call i32 asm sideeffect "addi ra, x0, 1", "={ra}"()
+  %t0 = call i32 asm sideeffect "addi t0, x0, 5", "={t0}"()
+  %t1 = call i32 asm sideeffect "addi t1, x0, 6", "={t1}"()
+  %t2 = call i32 asm sideeffect "addi t2, x0, 7", "={t2}"()
+  %s0 = call i32 asm sideeffect "addi s0, x0, 8", "={s0}"()
+  %s1 = call i32 asm sideeffect "addi s1, x0, 9", "={s1}"()
+  %a0 = call i32 asm sideeffect "addi a0, x0, 10", "={a0}"()
+  %a1 = call i32 asm sideeffect "addi a1, x0, 11", "={a1}"()
+  %a2 = call i32 asm sideeffect "addi a2, x0, 12", "={a2}"()
+  %a3 = call i32 asm sideeffect "addi a3, x0, 13", "={a3}"()
+  %a4 = call i32 asm sideeffect "addi a4, x0, 14", "={a4}"()
+  %a5 = call i32 asm sideeffect "addi a5, x0, 15", "={a5}"()
+  %a6 = call i32 asm sideeffect "addi a6, x0, 16", "={a6}"()
+  %a7 = call i32 asm sideeffect "addi a7, x0, 17", "={a7}"()
+  %s2 = call i32 asm sideeffect "addi s2, x0, 18", "={s2}"()
+  %s3 = call i32 asm sideeffect "addi s3, x0, 19", "={s3}"()
+  %s4 = call i32 asm sideeffect "addi s4, x0, 20", "={s4}"()
+  %s5 = call i32 asm sideeffect "addi s5, x0, 21", "={s5}"()
+  %s6 = call i32 asm sideeffect "addi s6, x0, 22", "={s6}"()
+  %s7 = call i32 asm sideeffect "addi s7, x0, 23", "={s7}"()
+  %s8 = call i32 asm sideeffect "addi s8, x0, 24", "={s8}"()
+  %s9 = call i32 asm sideeffect "addi s9, x0, 25", "={s9}"()
+  %s10 = call i32 asm sideeffect "addi s10, x0, 26", "={s10}"()
+  %s11 = call i32 asm sideeffect "addi s11, x0, 27", "={s11}"()
+  %t3 = call i32 asm sideeffect "addi t3, x0, 28", "={t3}"()
+  %t4 = call i32 asm sideeffect "addi t4, x0, 29", "={t4}"()
+  %t5 = call i32 asm sideeffect "addi t5, x0, 30", "={t5}"()
+  %t6 = call i32 asm sideeffect "addi t6, x0, 31", "={t6}"()
+
+  %cmp = icmp eq i32 %t5, %t6
+  br i1 %cmp, label %branch_1, label %branch_2
+
+branch_1:
+  call void asm sideeffect ".space 1048576", ""()
+  br label %branch_2
+
+branch_2:
+  call void asm sideeffect "# reg use $0", "{ra}"(i32 %ra)
+  call void asm sideeffect "# reg use $0", "{t0}"(i32 %t0)
+  call void asm sideeffect "# reg use $0", "{t1}"(i32 %t1)
+  call void asm sideeffect "# reg use $0", "{t2}"(i32 %t2)
+  call void asm sideeffect "# reg use $0", "{s0}"(i32 %s0)
+  call void asm sideeffect "# reg use $0", "{s1}"(i32 %s1)
+  call void asm sideeffect "# reg use $0", "{a0}"(i32 %a0)
+  call void asm sideeffect "# reg use $0", "{a1}"(i32 %a1)
+  call void asm sideeffect "# reg use $0", "{a2}"(i32 %a2)
+  call void asm sideeffect "# reg use $0", "{a3}"(i32 %a3)
+  call void asm sideeffect "# reg use $0", "{a4}"(i32 %a4)
+  call void asm sideeffect "# reg use $0", "{a5}"(i32 %a5)
+  call void asm sideeffect "# reg use $0", "{a6}"(i32 %a6)
+  call void asm sideeffect "# reg use $0", "{a7}"(i32 %a7)
+  call void asm sideeffect "# reg use $0", "{s2}"(i32 %s2)
+  call void asm sideeffect "# reg use $0", "{s3}"(i32 %s3)
+  call void asm sideeffect "# reg use $0", "{s4}"(i32 %s4)
+  call void asm sideeffect "# reg use $0", "{s5}"(i32 %s5)
+  call void asm sideeffect "# reg use $0", "{s6}"(i32 %s6)
+  call void asm sideeffect "# reg use $0", "{s7}"(i32 %s7)
+  call void asm sideeffect "# reg use $0", "{s8}"(i32 %s8)
+  call void asm sideeffect "# reg use $0", "{s9}"(i32 %s9)
+  call void asm sideeffect "# reg use $0", "{s10}"(i32 %s10)
+  call void asm sideeffect "# reg use $0", "{s11}"(i32 %s11)
+  call void asm sideeffect "# reg use $0", "{t3}"(i32 %t3)
+  call void asm sideeffect "# reg use $0", "{t4}"(i32 %t4)
+  call void asm sideeffect "# reg use $0", "{t5}"(i32 %t5)
+  call void asm sideeffect "# reg use $0", "{t6}"(i32 %t6)
+
+  ret void
+}
+
+define void @relax_jal_spill_64() {
+; CHECK-RV64-LABEL: relax_jal_spill_64:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    addi sp, sp, -112
+; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-RV64-NEXT:    sd ra, 104(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s0, 96(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s2, 80(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s3, 72(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s4, 64(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s5, 56(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s6, 48(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s7, 40(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s8, 32(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s9, 24(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s10, 16(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s11, 8(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-RV64-NEXT:    .cfi_offset s4, -48
+; CHECK-RV64-NEXT:    .cfi_offset s5, -56
+; CHECK-RV64-NEXT:    .cfi_offset s6, -64
+; CHECK-RV64-NEXT:    .cfi_offset s7, -72
+; CHECK-RV64-NEXT:    .cfi_offset s8, -80
+; CHECK-RV64-NEXT:    .cfi_offset s9, -88
+; CHECK-RV64-NEXT:    .cfi_offset s10, -96
+; CHECK-RV64-NEXT:    .cfi_offset s11, -104
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li ra, 1
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t0, 5
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t1, 6
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t2, 7
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s0, 8
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s1, 9
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a0, 10
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a1, 11
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a2, 12
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a3, 13
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a4, 14
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a5, 15
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a6, 16
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a7, 17
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s2, 18
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s3, 19
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s4, 20
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s5, 21
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s6, 22
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s7, 23
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s8, 24
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s9, 25
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s10, 26
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s11, 27
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t3, 28
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t4, 29
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t5, 30
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t6, 31
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    beq t5, t6, .LBB4_1
+; CHECK-RV64-NEXT:  # %bb.3:
+; CHECK-RV64-NEXT:    sd s11, 0(sp)
+; CHECK-RV64-NEXT:    jump .LBB4_4, s11
+; CHECK-RV64-NEXT:  .LBB4_1: # %branch_1
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    .zero 1048576
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    j .LBB4_2
+; CHECK-RV64-NEXT:  .LBB4_4: # %branch_2
+; CHECK-RV64-NEXT:    ld s11, 0(sp)
+; CHECK-RV64-NEXT:  .LBB4_2: # %branch_2
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use ra
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t0
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t1
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t2
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s0
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s1
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a0
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a1
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a2
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a3
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a4
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a5
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a6
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a7
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s2
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s3
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s4
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s5
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s6
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s7
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s8
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s9
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s10
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s11
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t3
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t4
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t5
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t6
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s1, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s2, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s3, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s6, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s7, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s8, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s9, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s10, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s11, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    addi sp, sp, 112
+; CHECK-RV64-NEXT:    ret
+
+  %ra = call i64 asm sideeffect "addi ra, x0, 1", "={ra}"()
+  %t0 = call i64 asm sideeffect "addi t0, x0, 5", "={t0}"()
+  %t1 = call i64 asm sideeffect "addi t1, x0, 6", "={t1}"()
+  %t2 = call i64 asm sideeffect "addi t2, x0, 7", "={t2}"()
+  %s0 = call i64 asm sideeffect "addi s0, x0, 8", "={s0}"()
+  %s1 = call i64 asm sideeffect "addi s1, x0, 9", "={s1}"()
+  %a0 = call i64 asm sideeffect "addi a0, x0, 10", "={a0}"()
+  %a1 = call i64 asm sideeffect "addi a1, x0, 11", "={a1}"()
+  %a2 = call i64 asm sideeffect "addi a2, x0, 12", "={a2}"()
+  %a3 = call i64 asm sideeffect "addi a3, x0, 13", "={a3}"()
+  %a4 = call i64 asm sideeffect "addi a4, x0, 14", "={a4}"()
+  %a5 = call i64 asm sideeffect "addi a5, x0, 15", "={a5}"()
+  %a6 = call i64 asm sideeffect "addi a6, x0, 16", "={a6}"()
+  %a7 = call i64 asm sideeffect "addi a7, x0, 17", "={a7}"()
+  %s2 = call i64 asm sideeffect "addi s2, x0, 18", "={s2}"()
+  %s3 = call i64 asm sideeffect "addi s3, x0, 19", "={s3}"()
+  %s4 = call i64 asm sideeffect "addi s4, x0, 20", "={s4}"()
+  %s5 = call i64 asm sideeffect "addi s5, x0, 21", "={s5}"()
+  %s6 = call i64 asm sideeffect "addi s6, x0, 22", "={s6}"()
+  %s7 = call i64 asm sideeffect "addi s7, x0, 23", "={s7}"()
+  %s8 = call i64 asm sideeffect "addi s8, x0, 24", "={s8}"()
+  %s9 = call i64 asm sideeffect "addi s9, x0, 25", "={s9}"()
+  %s10 = call i64 asm sideeffect "addi s10, x0, 26", "={s10}"()
+  %s11 = call i64 asm sideeffect "addi s11, x0, 27", "={s11}"()
+  %t3 = call i64 asm sideeffect "addi t3, x0, 28", "={t3}"()
+  %t4 = call i64 asm sideeffect "addi t4, x0, 29", "={t4}"()
+  %t5 = call i64 asm sideeffect "addi t5, x0, 30", "={t5}"()
+  %t6 = call i64 asm sideeffect "addi t6, x0, 31", "={t6}"()
+
+  %cmp = icmp eq i64 %t5, %t6
+  br i1 %cmp, label %branch_1, label %branch_2
+
+branch_1:
+  call void asm sideeffect ".space 1048576", ""()
+  br label %branch_2
+
+branch_2:
+  call void asm sideeffect "# reg use $0", "{ra}"(i64 %ra)
+  call void asm sideeffect "# reg use $0", "{t0}"(i64 %t0)
+  call void asm sideeffect "# reg use $0", "{t1}"(i64 %t1)
+  call void asm sideeffect "# reg use $0", "{t2}"(i64 %t2)
+  call void asm sideeffect "# reg use $0", "{s0}"(i64 %s0)
+  call void asm sideeffect "# reg use $0", "{s1}"(i64 %s1)
+  call void asm sideeffect "# reg use $0", "{a0}"(i64 %a0)
+  call void asm sideeffect "# reg use $0", "{a1}"(i64 %a1)
+  call void asm sideeffect "# reg use $0", "{a2}"(i64 %a2)
+  call void asm sideeffect "# reg use $0", "{a3}"(i64 %a3)
+  call void asm sideeffect "# reg use $0", "{a4}"(i64 %a4)
+  call void asm sideeffect "# reg use $0", "{a5}"(i64 %a5)
+  call void asm sideeffect "# reg use $0", "{a6}"(i64 %a6)
+  call void asm sideeffect "# reg use $0", "{a7}"(i64 %a7)
+  call void asm sideeffect "# reg use $0", "{s2}"(i64 %s2)
+  call void asm sideeffect "# reg use $0", "{s3}"(i64 %s3)
+  call void asm sideeffect "# reg use $0", "{s4}"(i64 %s4)
+  call void asm sideeffect "# reg use $0", "{s5}"(i64 %s5)
+  call void asm sideeffect "# reg use $0", "{s6}"(i64 %s6)
+  call void asm sideeffect "# reg use $0", "{s7}"(i64 %s7)
+  call void asm sideeffect "# reg use $0", "{s8}"(i64 %s8)
+  call void asm sideeffect "# reg use $0", "{s9}"(i64 %s9)
+  call void asm sideeffect "# reg use $0", "{s10}"(i64 %s10)
+  call void asm sideeffect "# reg use $0", "{s11}"(i64 %s11)
+  call void asm sideeffect "# reg use $0", "{t3}"(i64 %t3)
+  call void asm sideeffect "# reg use $0", "{t4}"(i64 %t4)
+  call void asm sideeffect "# reg use $0", "{t5}"(i64 %t5)
+  call void asm sideeffect "# reg use $0", "{t6}"(i64 %t6)
+
+  ret void
+}
+
+define void @relax_jal_spill_64_adjust_spill_slot() {
+; CHECK-RV64-LABEL: relax_jal_spill_64_adjust_spill_slot:
+; CHECK-RV64:       # %bb.0:
+; CHECK-RV64-NEXT:    addi sp, sp, -2032
+; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 2032
+; CHECK-RV64-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s1, 2008(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s2, 2000(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s3, 1992(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s4, 1984(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s5, 1976(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s6, 1968(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s7, 1960(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s8, 1952(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s9, 1944(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s10, 1936(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    sd s11, 1928(sp) # 8-byte Folded Spill
+; CHECK-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-RV64-NEXT:    .cfi_offset s4, -48
+; CHECK-RV64-NEXT:    .cfi_offset s5, -56
+; CHECK-RV64-NEXT:    .cfi_offset s6, -64
+; CHECK-RV64-NEXT:    .cfi_offset s7, -72
+; CHECK-RV64-NEXT:    .cfi_offset s8, -80
+; CHECK-RV64-NEXT:    .cfi_offset s9, -88
+; CHECK-RV64-NEXT:    .cfi_offset s10, -96
+; CHECK-RV64-NEXT:    .cfi_offset s11, -104
+; CHECK-RV64-NEXT:    addi s0, sp, 2032
+; CHECK-RV64-NEXT:    .cfi_def_cfa s0, 0
+; CHECK-RV64-NEXT:    lui a0, 2
+; CHECK-RV64-NEXT:    addiw a0, a0, -2032
+; CHECK-RV64-NEXT:    sub sp, sp, a0
+; CHECK-RV64-NEXT:    srli a0, sp, 12
+; CHECK-RV64-NEXT:    slli sp, a0, 12
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li ra, 1
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t0, 5
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t1, 6
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t2, 7
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s0, 8
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s1, 9
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a0, 10
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a1, 11
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a2, 12
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a3, 13
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a4, 14
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a5, 15
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a6, 16
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li a7, 17
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s2, 18
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s3, 19
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s4, 20
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s5, 21
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s6, 22
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s7, 23
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s8, 24
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s9, 25
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s10, 26
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li s11, 27
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t3, 28
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t4, 29
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t5, 30
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    li t6, 31
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    beq t5, t6, .LBB5_1
+; CHECK-RV64-NEXT:  # %bb.3:
+; CHECK-RV64-NEXT:    sd s11, 0(sp)
+; CHECK-RV64-NEXT:    jump .LBB5_4, s11
+; CHECK-RV64-NEXT:  .LBB5_1: # %branch_1
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    .zero 1048576
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    j .LBB5_2
+; CHECK-RV64-NEXT:  .LBB5_4: # %branch_2
+; CHECK-RV64-NEXT:    ld s11, 0(sp)
+; CHECK-RV64-NEXT:  .LBB5_2: # %branch_2
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use ra
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t0
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t1
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t2
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s0
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s1
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a0
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a1
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a2
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a3
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a4
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a5
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a6
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use a7
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s2
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s3
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s4
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s5
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s6
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s7
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s8
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s9
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s10
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use s11
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t3
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t4
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t5
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    #APP
+; CHECK-RV64-NEXT:    # reg use t6
+; CHECK-RV64-NEXT:    #NO_APP
+; CHECK-RV64-NEXT:    lui a0, 2
+; CHECK-RV64-NEXT:    sub sp, s0, a0
+; CHECK-RV64-NEXT:    lui a0, 2
+; CHECK-RV64-NEXT:    addiw a0, a0, -2032
+; CHECK-RV64-NEXT:    add sp, sp, a0
+; CHECK-RV64-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s1, 2008(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s2, 2000(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s3, 1992(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s4, 1984(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s5, 1976(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s6, 1968(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s7, 1960(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s8, 1952(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s9, 1944(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s10, 1936(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s11, 1928(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    addi sp, sp, 2032
+; CHECK-RV64-NEXT:    ret
+
+  ; If the stack is large and the offset of BranchRelaxationScratchFrameIndex
+  ; is out the range of 12-bit signed integer, check whether the spill slot is
+  ; adjusted to close to the stack base register.
+  %stack_obj = alloca i64, align 4096
+
+  %ra = call i64 asm sideeffect "addi ra, x0, 1", "={ra}"()
+  %t0 = call i64 asm sideeffect "addi t0, x0, 5", "={t0}"()
+  %t1 = call i64 asm sideeffect "addi t1, x0, 6", "={t1}"()
+  %t2 = call i64 asm sideeffect "addi t2, x0, 7", "={t2}"()
+  %s0 = call i64 asm sideeffect "addi s0, x0, 8", "={s0}"()
+  %s1 = call i64 asm sideeffect "addi s1, x0, 9", "={s1}"()
+  %a0 = call i64 asm sideeffect "addi a0, x0, 10", "={a0}"()
+  %a1 = call i64 asm sideeffect "addi a1, x0, 11", "={a1}"()
+  %a2 = call i64 asm sideeffect "addi a2, x0, 12", "={a2}"()
+  %a3 = call i64 asm sideeffect "addi a3, x0, 13", "={a3}"()
+  %a4 = call i64 asm sideeffect "addi a4, x0, 14", "={a4}"()
+  %a5 = call i64 asm sideeffect "addi a5, x0, 15", "={a5}"()
+  %a6 = call i64 asm sideeffect "addi a6, x0, 16", "={a6}"()
+  %a7 = call i64 asm sideeffect "addi a7, x0, 17", "={a7}"()
+  %s2 = call i64 asm sideeffect "addi s2, x0, 18", "={s2}"()
+  %s3 = call i64 asm sideeffect "addi s3, x0, 19", "={s3}"()
+  %s4 = call i64 asm sideeffect "addi s4, x0, 20", "={s4}"()
+  %s5 = call i64 asm sideeffect "addi s5, x0, 21", "={s5}"()
+  %s6 = call i64 asm sideeffect "addi s6, x0, 22", "={s6}"()
+  %s7 = call i64 asm sideeffect "addi s7, x0, 23", "={s7}"()
+  %s8 = call i64 asm sideeffect "addi s8, x0, 24", "={s8}"()
+  %s9 = call i64 asm sideeffect "addi s9, x0, 25", "={s9}"()
+  %s10 = call i64 asm sideeffect "addi s10, x0, 26", "={s10}"()
+  %s11 = call i64 asm sideeffect "addi s11, x0, 27", "={s11}"()
+  %t3 = call i64 asm sideeffect "addi t3, x0, 28", "={t3}"()
+  %t4 = call i64 asm sideeffect "addi t4, x0, 29", "={t4}"()
+  %t5 = call i64 asm sideeffect "addi t5, x0, 30", "={t5}"()
+  %t6 = call i64 asm sideeffect "addi t6, x0, 31", "={t6}"()
+
+  %cmp = icmp eq i64 %t5, %t6
+  br i1 %cmp, label %branch_1, label %branch_2
+
+branch_1:
+  call void asm sideeffect ".space 1048576", ""()
+  br label %branch_2
+
+branch_2:
+  call void asm sideeffect "# reg use $0", "{ra}"(i64 %ra)
+  call void asm sideeffect "# reg use $0", "{t0}"(i64 %t0)
+  call void asm sideeffect "# reg use $0", "{t1}"(i64 %t1)
+  call void asm sideeffect "# reg use $0", "{t2}"(i64 %t2)
+  call void asm sideeffect "# reg use $0", "{s0}"(i64 %s0)
+  call void asm sideeffect "# reg use $0", "{s1}"(i64 %s1)
+  call void asm sideeffect "# reg use $0", "{a0}"(i64 %a0)
+  call void asm sideeffect "# reg use $0", "{a1}"(i64 %a1)
+  call void asm sideeffect "# reg use $0", "{a2}"(i64 %a2)
+  call void asm sideeffect "# reg use $0", "{a3}"(i64 %a3)
+  call void asm sideeffect "# reg use $0", "{a4}"(i64 %a4)
+  call void asm sideeffect "# reg use $0", "{a5}"(i64 %a5)
+  call void asm sideeffect "# reg use $0", "{a6}"(i64 %a6)
+  call void asm sideeffect "# reg use $0", "{a7}"(i64 %a7)
+  call void asm sideeffect "# reg use $0", "{s2}"(i64 %s2)
+  call void asm sideeffect "# reg use $0", "{s3}"(i64 %s3)
+  call void asm sideeffect "# reg use $0", "{s4}"(i64 %s4)
+  call void asm sideeffect "# reg use $0", "{s5}"(i64 %s5)
+  call void asm sideeffect "# reg use $0", "{s6}"(i64 %s6)
+  call void asm sideeffect "# reg use $0", "{s7}"(i64 %s7)
+  call void asm sideeffect "# reg use $0", "{s8}"(i64 %s8)
+  call void asm sideeffect "# reg use $0", "{s9}"(i64 %s9)
+  call void asm sideeffect "# reg use $0", "{s10}"(i64 %s10)
+  call void asm sideeffect "# reg use $0", "{s11}"(i64 %s11)
+  call void asm sideeffect "# reg use $0", "{t3}"(i64 %t3)
+  call void asm sideeffect "# reg use $0", "{t4}"(i64 %t4)
+  call void asm sideeffect "# reg use $0", "{t5}"(i64 %t5)
+  call void asm sideeffect "# reg use $0", "{t6}"(i64 %t6)
+
+  ret void
+}


        


More information about the llvm-commits mailing list