[llvm] 9ad38e5 - Revert "[VE] Restructure eliminateFrameIndex"

Kazushi Marukawa via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 5 03:35:36 PDT 2022


Author: Kazushi (Jam) Marukawa
Date: 2022-07-05T19:35:12+09:00
New Revision: 9ad38e5288a1087920c4344ab688c7c22600a185

URL: https://github.com/llvm/llvm-project/commit/9ad38e5288a1087920c4344ab688c7c22600a185
DIFF: https://github.com/llvm/llvm-project/commit/9ad38e5288a1087920c4344ab688c7c22600a185.diff

LOG: Revert "[VE] Restructure eliminateFrameIndex"

This reverts commit 98e52e8bff525b1fb2b269f74b27f0a984588c9c.

Added: 
    

Modified: 
    llvm/lib/Target/VE/VERegisterInfo.cpp

Removed: 
    llvm/test/CodeGen/VE/Scalar/load_stk.ll
    llvm/test/CodeGen/VE/Scalar/store_stk.ll


################################################################################
diff  --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp
index cd2c1d75a8fcc..d175ad26c742c 100644
--- a/llvm/lib/Target/VE/VERegisterInfo.cpp
+++ b/llvm/lib/Target/VE/VERegisterInfo.cpp
@@ -133,179 +133,66 @@ static unsigned offsetToDisp(MachineInstr &MI) {
   return OffDisp;
 }
 
-class EliminateFrameIndex {
-  const TargetInstrInfo &TII;
-  const TargetRegisterInfo &TRI;
-  const DebugLoc &DL;
-  MachineBasicBlock &MBB;
-  MachineBasicBlock::iterator II;
-  Register clobber;
-
-  // Some helper functions for the ease of instruction building.
-  MachineFunction &getFunc() const { return *MBB.getParent(); }
-  inline MCRegister getSubReg(MCRegister Reg, unsigned Idx) const {
-    return TRI.getSubReg(Reg, Idx);
-  }
-  inline const MCInstrDesc &get(unsigned Opcode) const {
-    return TII.get(Opcode);
-  }
-  inline MachineInstrBuilder build(const MCInstrDesc &MCID, Register DestReg) {
-    return BuildMI(MBB, II, DL, MCID, DestReg);
-  }
-  inline MachineInstrBuilder build(unsigned InstOpc, Register DestReg) {
-    return build(get(InstOpc), DestReg);
-  }
-  inline MachineInstrBuilder build(const MCInstrDesc &MCID) {
-    return BuildMI(MBB, II, DL, MCID);
-  }
-  inline MachineInstrBuilder build(unsigned InstOpc) {
-    return build(get(InstOpc));
-  }
-
-  // Calculate an address of frame index from a frame register and a given
-  // offset if the offset doesn't fit in the immediate field.  Use a clobber
-  // register to hold calculated address.
-  void prepareReplaceFI(MachineInstr &MI, Register &FrameReg, int64_t &Offset,
-                        int64_t Bytes = 0);
-  // Replace the frame index in \p MI with a frame register and a given offset
-  // if it fits in the immediate field.  Otherwise, use pre-calculated address
-  // in a clobber regsiter.
-  void replaceFI(MachineInstr &MI, Register FrameReg, int64_t Offset,
-                 int FIOperandNum);
-
-  // Expand and eliminate Frame Index of pseudo STQrii and LDQrii.
-  void processSTQ(MachineInstr &MI, Register FrameReg, int64_t Offset,
-                  int FIOperandNum);
-  void processLDQ(MachineInstr &MI, Register FrameReg, int64_t Offset,
-                  int FIOperandNum);
-
-public:
-  EliminateFrameIndex(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
-                      const DebugLoc &DL, MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator II)
-      : TII(TII), TRI(TRI), DL(DL), MBB(MBB), II(II), clobber(VE::SX13) {}
-
-  // Expand and eliminate Frame Index from MI
-  void processMI(MachineInstr &MI, Register FrameReg, int64_t Offset,
-                 int FIOperandNum);
-};
-
-// Prepare the frame index if it doesn't fit in the immediate field.  Use
-// clobber register to hold calculated address.
-void EliminateFrameIndex::prepareReplaceFI(MachineInstr &MI, Register &FrameReg,
-                                           int64_t &Offset, int64_t Bytes) {
-  if (isInt<32>(Offset) && isInt<32>(Offset + Bytes)) {
-    // If the offset is small enough to fit in the immediate field, directly
-    // encode it.  So, nothing to prepare here.
-    return;
-  }
-
-  // If the offset doesn't fit, emit following codes.  This clobbers SX13
-  // which we always know is available here.
-  //   lea     %clobber, Offset at lo
-  //   and     %clobber, %clobber, (32)0
-  //   lea.sl  %clobber, Offset at hi(FrameReg, %clobber)
-  build(VE::LEAzii, clobber).addImm(0).addImm(0).addImm(Lo_32(Offset));
-  build(VE::ANDrm, clobber).addReg(clobber).addImm(M0(32));
-  build(VE::LEASLrri, clobber)
-      .addReg(clobber)
-      .addReg(FrameReg)
-      .addImm(Hi_32(Offset));
-
-  // Use clobber register as a frame register and 0 offset
-  FrameReg = clobber;
-  Offset = 0;
-}
-
-// Replace the frame index in \p MI with a proper byte and framereg offset.
-void EliminateFrameIndex::replaceFI(MachineInstr &MI, Register FrameReg,
-                                    int64_t Offset, int FIOperandNum) {
-  assert(isInt<32>(Offset));
-
-  // The offset must be small enough to fit in the immediate field after
-  // call of prepareReplaceFI.  Therefore, we directly encode it.
+static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II,
+                      MachineInstr &MI, const DebugLoc &dl,
+                      unsigned FIOperandNum, int Offset, Register FrameReg) {
+  // Replace frame index with a frame pointer reference directly.
+  // VE has 32 bit offset field, so no need to expand a target instruction.
+  // Directly encode it.
   MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false);
   MI.getOperand(FIOperandNum + offsetToDisp(MI)).ChangeToImmediate(Offset);
 }
 
-void EliminateFrameIndex::processSTQ(MachineInstr &MI, Register FrameReg,
-                                     int64_t Offset, int FIOperandNum) {
-  assert(MI.getOpcode() == VE::STQrii);
-  LLVM_DEBUG(dbgs() << "processSTQ: "; MI.dump());
-
-  prepareReplaceFI(MI, FrameReg, Offset, 8);
-
-  Register SrcReg = MI.getOperand(3).getReg();
-  Register SrcHiReg = getSubReg(SrcReg, VE::sub_even);
-  Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd);
-  // VE stores HiReg to 8(addr) and LoReg to 0(addr)
-  MachineInstr *StMI =
-      build(VE::STrii).addReg(FrameReg).addImm(0).addImm(0).addReg(SrcLoReg);
-  replaceFI(*StMI, FrameReg, Offset, 0);
-  // Mutate to 'hi' store.
-  MI.setDesc(get(VE::STrii));
-  MI.getOperand(3).setReg(SrcHiReg);
-  Offset += 8;
-  replaceFI(MI, FrameReg, Offset, FIOperandNum);
-}
-
-void EliminateFrameIndex::processLDQ(MachineInstr &MI, Register FrameReg,
-                                     int64_t Offset, int FIOperandNum) {
-  assert(MI.getOpcode() == VE::LDQrii);
-  LLVM_DEBUG(dbgs() << "processLDQ: "; MI.dump());
-
-  prepareReplaceFI(MI, FrameReg, Offset, 8);
-
-  Register DestReg = MI.getOperand(0).getReg();
-  Register DestHiReg = getSubReg(DestReg, VE::sub_even);
-  Register DestLoReg = getSubReg(DestReg, VE::sub_odd);
-  // VE loads HiReg from 8(addr) and LoReg from 0(addr)
-  MachineInstr *StMI =
-      build(VE::LDrii, DestLoReg).addReg(FrameReg).addImm(0).addImm(0);
-  replaceFI(*StMI, FrameReg, Offset, 1);
-  MI.setDesc(get(VE::LDrii));
-  MI.getOperand(0).setReg(DestHiReg);
-  Offset += 8;
-  replaceFI(MI, FrameReg, Offset, FIOperandNum);
-}
-
-void EliminateFrameIndex::processMI(MachineInstr &MI, Register FrameReg,
-                                    int64_t Offset, int FIOperandNum) {
-  switch (MI.getOpcode()) {
-  case VE::STQrii:
-    processSTQ(MI, FrameReg, Offset, FIOperandNum);
-    return;
-  case VE::LDQrii:
-    processLDQ(MI, FrameReg, Offset, FIOperandNum);
-    return;
-  }
-  prepareReplaceFI(MI, FrameReg, Offset);
-  replaceFI(MI, FrameReg, Offset, FIOperandNum);
-}
-
 void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                          int SPAdj, unsigned FIOperandNum,
                                          RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   MachineInstr &MI = *II;
+  DebugLoc dl = MI.getDebugLoc();
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-
   MachineFunction &MF = *MI.getParent()->getParent();
-  const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
-  const VEFrameLowering &TFI = *getFrameLowering(MF);
-  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
-  const VERegisterInfo &TRI = *Subtarget.getRegisterInfo();
-  DebugLoc DL = MI.getDebugLoc();
-  EliminateFrameIndex EFI(TII, TRI, DL, *MI.getParent(), II);
+  const VEFrameLowering *TFI = getFrameLowering(MF);
 
-  // Retrieve FrameReg and byte offset for stack slot.
   Register FrameReg;
-  int64_t Offset =
-      TFI.getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
+  int Offset;
+  Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg).getFixed();
+
   Offset += MI.getOperand(FIOperandNum + offsetToDisp(MI)).getImm();
 
-  EFI.processMI(MI, FrameReg, Offset, FIOperandNum);
+  if (MI.getOpcode() == VE::STQrii) {
+    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+    Register SrcReg = MI.getOperand(3).getReg();
+    Register SrcHiReg = getSubReg(SrcReg, VE::sub_even);
+    Register SrcLoReg = getSubReg(SrcReg, VE::sub_odd);
+    // VE stores HiReg to 8(addr) and LoReg to 0(addr)
+    MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii))
+                             .addReg(FrameReg)
+                             .addImm(0)
+                             .addImm(0)
+                             .addReg(SrcLoReg);
+    replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
+    MI.setDesc(TII.get(VE::STrii));
+    MI.getOperand(3).setReg(SrcHiReg);
+    Offset += 8;
+  } else if (MI.getOpcode() == VE::LDQrii) {
+    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+    Register DestReg = MI.getOperand(0).getReg();
+    Register DestHiReg = getSubReg(DestReg, VE::sub_even);
+    Register DestLoReg = getSubReg(DestReg, VE::sub_odd);
+    // VE loads HiReg from 8(addr) and LoReg from 0(addr)
+    MachineInstr *StMI =
+        BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg)
+            .addReg(FrameReg)
+            .addImm(0)
+            .addImm(0);
+    replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
+    MI.setDesc(TII.get(VE::LDrii));
+    MI.getOperand(0).setReg(DestHiReg);
+    Offset += 8;
+  }
+
+  replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
 }
 
 Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const {

diff  --git a/llvm/test/CodeGen/VE/Scalar/load_stk.ll b/llvm/test/CodeGen/VE/Scalar/load_stk.ll
deleted file mode 100644
index 9ffab1464a992..0000000000000
--- a/llvm/test/CodeGen/VE/Scalar/load_stk.ll
+++ /dev/null
@@ -1,795 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=ve | FileCheck %s
-
-;;; Test store instructions
-;;;
-;;; Note:
-;;;   We test store instructions using general stack, stack with dynamic
-;;;   allocation, stack with dynamic allocation and alignment, and stack
-;;;   with dynamic allocation, alignment, and spill.
-;;;
-;;; Fist test using a stack for leaf function.
-;;;
-;;;   |                                              | Higher address
-;;;   |----------------------------------------------| <- old sp
-;;;   | Local variables of fixed size                |
-;;;   |----------------------------------------------| <- sp
-;;;   |                                              | Lower address
-;;;
-;;; Access local variable using sp (%s11).  In addition, please remember
-;;; that stack is aligned by 16 bytes.
-;;;
-;;; Second test using a general stack.
-;;;
-;;;   |                                              | Higher address
-;;;   |----------------------------------------------|
-;;;   | Parameter area for this function             |
-;;;   |----------------------------------------------|
-;;;   | Register save area (RSA) for this function   |
-;;;   |----------------------------------------------|
-;;;   | Return address for this function             |
-;;;   |----------------------------------------------|
-;;;   | Frame pointer for this function              |
-;;;   |----------------------------------------------| <- fp(=old sp)
-;;;   | Local variables of fixed size                |
-;;;   |----------------------------------------------|
-;;;   |.variable-sized.local.variables.(VLAs)........|
-;;;   |..............................................|
-;;;   |..............................................|
-;;;   |----------------------------------------------| <- returned by alloca
-;;;   | Parameter area for callee                    |
-;;;   |----------------------------------------------|
-;;;   | Register save area (RSA) for callee          |
-;;;   |----------------------------------------------|
-;;;   | Return address for callee                    |
-;;;   |----------------------------------------------|
-;;;   | Frame pointer for callee                     |
-;;;   |----------------------------------------------| <- sp
-;;;   |                                              | Lower address
-;;;
-;;; Access local variable using fp (%s9) since the size of VLA is not
-;;; known.  At the beginning of the functions, allocates 240 + data
-;;; bytes.  240 means RSA+RA+FP (=176) + Parameter (=64).
-;;;
-;;; Third test using a general stack.
-;;;
-;;;   |                                              | Higher address
-;;;   |----------------------------------------------|
-;;;   | Parameter area for this function             |
-;;;   |----------------------------------------------|
-;;;   | Register save area (RSA) for this function   |
-;;;   |----------------------------------------------|
-;;;   | Return address for this function             |
-;;;   |----------------------------------------------|
-;;;   | Frame pointer for this function              |
-;;;   |----------------------------------------------| <- fp(=old sp)
-;;;   |.empty.space.to.make.part.below.aligned.in....|
-;;;   |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
-;;;   |.alignment....................................|  unknown at compile time)
-;;;   |----------------------------------------------|
-;;;   | Local variables of fixed size including spill|
-;;;   | slots                                        |
-;;;   |----------------------------------------------| <- bp(not defined by ABI,
-;;;   |.variable-sized.local.variables.(VLAs)........|       LLVM chooses SX17)
-;;;   |..............................................| (size of this area is
-;;;   |..............................................|  unknown at compile time)
-;;;   |----------------------------------------------| <- stack top (returned by
-;;;   | Parameter area for callee                    |               alloca)
-;;;   |----------------------------------------------|
-;;;   | Register save area (RSA) for callee          |
-;;;   |----------------------------------------------|
-;;;   | Return address for callee                    |
-;;;   |----------------------------------------------|
-;;;   | Frame pointer for callee                     |
-;;;   |----------------------------------------------| <- sp
-;;;   |                                              | Lower address
-;;;
-;;; Access local variable using bp (%s17) since the size of alignment
-;;; and VLA are not known.  At the beginning of the functions, allocates
-;;; pad(240 + data + align) bytes.  Then, access data through bp + pad(240)
-;;; since this address doesn't change even if VLA is dynamically allocated.
-;;;
-;;; Fourth test using a general stack with some spills.
-;;;
-
-; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-define x86_fastcallcc i64 @loadi64_stk() {
-; CHECK-LABEL: loadi64_stk:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    adds.l %s11, -16, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB0_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB0_2:
-; CHECK-NEXT:    ld %s0, 8(, %s11)
-; CHECK-NEXT:    adds.l %s11, 16, %s11
-; CHECK-NEXT:    b.l.t (, %s10)
-  %1 = alloca i64, align 8
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1)
-  %2 = load volatile i64, ptr %1, align 8, !tbaa !3
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1)
-  ret i64 %2
-}
-
-; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
-
-; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc i64 @loadi64_stk_big() {
-; CHECK-LABEL: loadi64_stk_big:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lea %s11, -2147483648(, %s11)
-; CHECK-NEXT:    brge.l %s11, %s8, .LBB1_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB1_4:
-; CHECK-NEXT:    ld %s0, 2147483640(, %s11)
-; CHECK-NEXT:    or %s1, 0, (0)1
-; CHECK-NEXT:    lea %s2, 2147483640
-; CHECK-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ld %s3, (%s1, %s11)
-; CHECK-NEXT:    lea %s1, 8(, %s1)
-; CHECK-NEXT:    brne.l %s1, %s2, .LBB1_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lea %s13, -2147483648
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %1 = alloca i64, align 8
-  %2 = alloca [268435455 x i64], align 8
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %2)
-  %3 = load volatile i64, ptr %1, align 8, !tbaa !3
-  br label %5
-
-4:                                                ; preds = %5
-  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1)
-  ret i64 %3
-
-5:                                                ; preds = %0, %5
-  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
-  %7 = getelementptr inbounds [268435455 x i64], ptr %2, i64 0, i64 %6
-  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
-  %9 = add nuw nsw i64 %6, 1
-  %10 = icmp eq i64 %9, 268435455
-  br i1 %10, label %4, label %5, !llvm.loop !7
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc i64 @loadi64_stk_big2() {
-; CHECK-LABEL: loadi64_stk_big2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lea %s13, 2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
-; CHECK-NEXT:    brge.l %s11, %s8, .LBB2_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB2_4:
-; CHECK-NEXT:    lea %s13, -2147483640
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
-; CHECK-NEXT:    ld %s0, (, %s13)
-; CHECK-NEXT:    or %s1, 0, (0)1
-; CHECK-NEXT:    lea %s2, -2147483648
-; CHECK-NEXT:    and %s2, %s2, (32)0
-; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ld %s3, 8(%s1, %s11)
-; CHECK-NEXT:    lea %s1, 8(, %s1)
-; CHECK-NEXT:    brne.l %s1, %s2, .LBB2_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lea %s13, -2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %1 = alloca i64, align 8
-  %2 = alloca [268435456 x i64], align 8
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2)
-  %3 = load volatile i64, ptr %1, align 8, !tbaa !3
-  br label %5
-
-4:                                                ; preds = %5
-  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %1)
-  ret i64 %3
-
-5:                                                ; preds = %0, %5
-  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
-  %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6
-  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
-  %9 = add nuw nsw i64 %6, 1
-  %10 = icmp eq i64 %9, 268435456
-  br i1 %10, label %4, label %5, !llvm.loop !9
-}
-
-; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-define x86_fastcallcc i64 @loadi64_stk_dyn(i64 noundef %0) {
-; CHECK-LABEL: loadi64_stk_dyn:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -256(, %s11)
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB3_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB3_2:
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    ld %s0, (, %s0)
-; CHECK-NEXT:    ld %s0, -8(, %s9)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca i64, align 8
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
-  %3 = alloca i8, i64 %0, align 8
-  %4 = load volatile i64, ptr %3, align 8, !tbaa !3
-  %5 = load volatile i64, ptr %2, align 8, !tbaa !3
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
-  ret i64 %5
-}
-
-; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-define x86_fastcallcc i64 @loadi64_stk_dyn_align(i64 noundef %0) {
-; CHECK-LABEL: loadi64_stk_dyn_align:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -288(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (59)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB4_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB4_2:
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    ld %s0, (, %s0)
-; CHECK-NEXT:    ld %s0, 256(, %s17)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca i64, align 32
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
-  %3 = alloca i8, i64 %0, align 8
-  %4 = load volatile i64, ptr %3, align 8, !tbaa !3
-  %5 = load volatile i64, ptr %2, align 32, !tbaa !10
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
-  ret i64 %5
-}
-
-; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-define x86_fastcallcc i64 @loadi64_stk_dyn_align2(i64 noundef %0) {
-; CHECK-LABEL: loadi64_stk_dyn_align2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -320(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (58)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB5_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB5_2:
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    ld %s0, (, %s0)
-; CHECK-NEXT:    ld %s0, 288(, %s17)
-; CHECK-NEXT:    ld %s1, 256(, %s17)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca i64, align 32
-  %3 = alloca i64, align 64
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
-  %4 = alloca i8, i64 %0, align 8
-  %5 = load volatile i64, ptr %4, align 8, !tbaa !3
-  %6 = load volatile i64, ptr %2, align 32, !tbaa !10
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
-  %7 = load volatile i64, ptr %3, align 64, !tbaa !10
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
-  ret i64 %6
-}
-
-; Function Attrs: nounwind
-define x86_fastcallcc i64 @loadi64_stk_dyn_align_spill(i64 noundef %0) {
-; CHECK-LABEL: loadi64_stk_dyn_align_spill:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -288(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (59)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB6_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB6_2:
-; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    st %s19, 56(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    or %s18, 0, %s0
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    ld %s0, (, %s0)
-; CHECK-NEXT:    ld %s19, 256(, %s17)
-; CHECK-NEXT:    lea %s0, dummy at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, dummy at hi(, %s0)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, pass at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, pass at hi(, %s0)
-; CHECK-NEXT:    or %s0, 0, %s18
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    or %s0, 0, %s19
-; CHECK-NEXT:    ld %s19, 56(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca i64, align 32
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
-  %3 = alloca i8, i64 %0, align 8
-  %4 = load volatile i64, ptr %3, align 8, !tbaa !3
-  %5 = load volatile i64, ptr %2, align 32, !tbaa !10
-  tail call void (...) @dummy()
-  tail call void @pass(i64 noundef %0)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
-  ret i64 %5
-}
-
-declare void @dummy(...)
-
-declare void @pass(i64 noundef)
-
-; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-define x86_fastcallcc fp128 @loadquad_stk() {
-; CHECK-LABEL: loadquad_stk:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    adds.l %s11, -16, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB7_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB7_2:
-; CHECK-NEXT:    ld %s1, (, %s11)
-; CHECK-NEXT:    ld %s0, 8(, %s11)
-; CHECK-NEXT:    adds.l %s11, 16, %s11
-; CHECK-NEXT:    b.l.t (, %s10)
-  %1 = alloca fp128, align 16
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1)
-  %2 = load volatile fp128, ptr %1, align 16, !tbaa !12
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1)
-  ret fp128 %2
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc fp128 @loadquad_stk_big() {
-; CHECK-LABEL: loadquad_stk_big:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lea %s13, 2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
-; CHECK-NEXT:    brge.l %s11, %s8, .LBB8_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB8_4:
-; CHECK-NEXT:    lea %s13, -2147483648
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
-; CHECK-NEXT:    ld %s1, (, %s13)
-; CHECK-NEXT:    ld %s0, 8(, %s13)
-; CHECK-NEXT:    or %s2, 0, (0)1
-; CHECK-NEXT:    lea %s3, 2147483640
-; CHECK-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ld %s4, 8(%s2, %s11)
-; CHECK-NEXT:    lea %s2, 8(, %s2)
-; CHECK-NEXT:    brne.l %s2, %s3, .LBB8_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lea %s13, -2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %1 = alloca fp128, align 16
-  %2 = alloca [268435455 x i64], align 8
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %2)
-  %3 = load volatile fp128, ptr %1, align 16, !tbaa !12
-  br label %5
-
-4:                                                ; preds = %5
-  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1)
-  ret fp128 %3
-
-5:                                                ; preds = %0, %5
-  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
-  %7 = getelementptr inbounds [268435455 x i64], ptr %2, i64 0, i64 %6
-  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
-  %9 = add nuw nsw i64 %6, 1
-  %10 = icmp eq i64 %9, 268435455
-  br i1 %10, label %4, label %5, !llvm.loop !14
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc fp128 @loadquad_stk_big2() {
-; CHECK-LABEL: loadquad_stk_big2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lea %s13, 2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
-; CHECK-NEXT:    brge.l %s11, %s8, .LBB9_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB9_4:
-; CHECK-NEXT:    lea %s13, -2147483648
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
-; CHECK-NEXT:    ld %s1, (, %s13)
-; CHECK-NEXT:    ld %s0, 8(, %s13)
-; CHECK-NEXT:    or %s2, 0, (0)1
-; CHECK-NEXT:    lea %s3, -2147483648
-; CHECK-NEXT:    and %s3, %s3, (32)0
-; CHECK-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ld %s4, (%s2, %s11)
-; CHECK-NEXT:    lea %s2, 8(, %s2)
-; CHECK-NEXT:    brne.l %s2, %s3, .LBB9_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lea %s13, -2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %1 = alloca fp128, align 16
-  %2 = alloca [268435456 x i64], align 8
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %1)
-  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2)
-  %3 = load volatile fp128, ptr %1, align 16, !tbaa !12
-  br label %5
-
-4:                                                ; preds = %5
-  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %1)
-  ret fp128 %3
-
-5:                                                ; preds = %0, %5
-  %6 = phi i64 [ 0, %0 ], [ %9, %5 ]
-  %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6
-  %8 = load volatile i64, ptr %7, align 8, !tbaa !3
-  %9 = add nuw nsw i64 %6, 1
-  %10 = icmp eq i64 %9, 268435456
-  br i1 %10, label %4, label %5, !llvm.loop !15
-}
-
-; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-define x86_fastcallcc fp128 @loadquad_stk_dyn(i64 noundef %0) {
-; CHECK-LABEL: loadquad_stk_dyn:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -256(, %s11)
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB10_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB10_2:
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    ld %s1, 8(, %s0)
-; CHECK-NEXT:    ld %s0, (, %s0)
-; CHECK-NEXT:    ld %s1, -16(, %s9)
-; CHECK-NEXT:    ld %s0, -8(, %s9)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca fp128, align 16
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
-  %3 = alloca i8, i64 %0, align 16
-  %4 = load volatile fp128, ptr %3, align 16, !tbaa !12
-  %5 = load volatile fp128, ptr %2, align 16, !tbaa !12
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
-  ret fp128 %5
-}
-
-; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-define x86_fastcallcc fp128 @loadquad_stk_dyn_align(i64 noundef %0) {
-; CHECK-LABEL: loadquad_stk_dyn_align:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -288(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (59)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB11_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB11_2:
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    ld %s1, 8(, %s0)
-; CHECK-NEXT:    ld %s0, (, %s0)
-; CHECK-NEXT:    ld %s1, 256(, %s17)
-; CHECK-NEXT:    ld %s0, 264(, %s17)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca fp128, align 32
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
-  %3 = alloca i8, i64 %0, align 16
-  %4 = load volatile fp128, ptr %3, align 16, !tbaa !12
-  %5 = load volatile fp128, ptr %2, align 32, !tbaa !16
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
-  ret fp128 %5
-}
-
-; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
-define x86_fastcallcc fp128 @loadquad_stk_dyn_align2(i64 noundef %0) {
-; CHECK-LABEL: loadquad_stk_dyn_align2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -320(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (58)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB12_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB12_2:
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    ld %s1, 8(, %s0)
-; CHECK-NEXT:    ld %s0, (, %s0)
-; CHECK-NEXT:    ld %s1, 288(, %s17)
-; CHECK-NEXT:    ld %s0, 296(, %s17)
-; CHECK-NEXT:    ld %s3, 256(, %s17)
-; CHECK-NEXT:    ld %s2, 264(, %s17)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca fp128, align 32
-  %3 = alloca fp128, align 64
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
-  %4 = alloca i8, i64 %0, align 16
-  %5 = load volatile fp128, ptr %4, align 16, !tbaa !12
-  %6 = load volatile fp128, ptr %2, align 32, !tbaa !16
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
-  %7 = load volatile fp128, ptr %3, align 64, !tbaa !16
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
-  ret fp128 %6
-}
-
-; Function Attrs: nounwind
-define x86_fastcallcc fp128 @loadquad_stk_dyn_align_spill(i64 noundef %0) {
-; CHECK-LABEL: loadquad_stk_dyn_align_spill:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -288(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (59)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB13_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB13_2:
-; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    st %s20, 64(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    st %s21, 72(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    or %s18, 0, %s0
-; CHECK-NEXT:    lea %s0, 15(, %s0)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    ld %s1, 8(, %s0)
-; CHECK-NEXT:    ld %s0, (, %s0)
-; CHECK-NEXT:    ld %s21, 256(, %s17)
-; CHECK-NEXT:    ld %s20, 264(, %s17)
-; CHECK-NEXT:    lea %s0, dummy at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, dummy at hi(, %s0)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, pass at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, pass at hi(, %s0)
-; CHECK-NEXT:    or %s0, 0, %s18
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    or %s0, 0, %s20
-; CHECK-NEXT:    or %s1, 0, %s21
-; CHECK-NEXT:    ld %s21, 72(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    ld %s20, 64(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca fp128, align 32
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
-  %3 = alloca i8, i64 %0, align 16
-  %4 = load volatile fp128, ptr %3, align 16, !tbaa !12
-  %5 = load volatile fp128, ptr %2, align 32, !tbaa !16
-  tail call void (...) @dummy()
-  tail call void @pass(i64 noundef %0)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
-  ret fp128 %5
-}
-
-!3 = !{!4, !4, i64 0}
-!4 = !{!"long", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
-!7 = distinct !{!7, !8}
-!8 = !{!"llvm.loop.mustprogress"}
-!9 = distinct !{!9, !8}
-!10 = !{!11, !4, i64 0}
-!11 = !{!"", !4, i64 0}
-!12 = !{!13, !13, i64 0}
-!13 = !{!"long double", !5, i64 0}
-!14 = distinct !{!14, !8}
-!15 = distinct !{!15, !8}
-!16 = !{!17, !13, i64 0}
-!17 = !{!"", !13, i64 0}

diff  --git a/llvm/test/CodeGen/VE/Scalar/store_stk.ll b/llvm/test/CodeGen/VE/Scalar/store_stk.ll
deleted file mode 100644
index 76a3fda813620..0000000000000
--- a/llvm/test/CodeGen/VE/Scalar/store_stk.ll
+++ /dev/null
@@ -1,808 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=ve | FileCheck %s
-
-;;; Test store instructions
-;;;
-;;; Note:
-;;;   We test store instructions using general stack, stack with dynamic
-;;;   allocation, stack with dynamic allocation and alignment, and stack
-;;;   with dynamic allocation, alignment, and spill.
-;;;
-;;; Fist test using a stack for leaf function.
-;;;
-;;;   |                                              | Higher address
-;;;   |----------------------------------------------| <- old sp
-;;;   | Local variables of fixed size                |
-;;;   |----------------------------------------------| <- sp
-;;;   |                                              | Lower address
-;;;
-;;; Access local variable using sp (%s11).  In addition, please remember
-;;; that stack is aligned by 16 bytes.
-;;;
-;;; Second test using a general stack.
-;;;
-;;;   |                                              | Higher address
-;;;   |----------------------------------------------|
-;;;   | Parameter area for this function             |
-;;;   |----------------------------------------------|
-;;;   | Register save area (RSA) for this function   |
-;;;   |----------------------------------------------|
-;;;   | Return address for this function             |
-;;;   |----------------------------------------------|
-;;;   | Frame pointer for this function              |
-;;;   |----------------------------------------------| <- fp(=old sp)
-;;;   | Local variables of fixed size                |
-;;;   |----------------------------------------------|
-;;;   |.variable-sized.local.variables.(VLAs)........|
-;;;   |..............................................|
-;;;   |..............................................|
-;;;   |----------------------------------------------| <- returned by alloca
-;;;   | Parameter area for callee                    |
-;;;   |----------------------------------------------|
-;;;   | Register save area (RSA) for callee          |
-;;;   |----------------------------------------------|
-;;;   | Return address for callee                    |
-;;;   |----------------------------------------------|
-;;;   | Frame pointer for callee                     |
-;;;   |----------------------------------------------| <- sp
-;;;   |                                              | Lower address
-;;;
-;;; Access local variable using fp (%s9) since the size of VLA is not
-;;; known.  At the beginning of the functions, allocates 240 + data
-;;; bytes.  240 means RSA+RA+FP (=176) + Parameter (=64).
-;;;
-;;; Third test using a general stack.
-;;;
-;;;   |                                              | Higher address
-;;;   |----------------------------------------------|
-;;;   | Parameter area for this function             |
-;;;   |----------------------------------------------|
-;;;   | Register save area (RSA) for this function   |
-;;;   |----------------------------------------------|
-;;;   | Return address for this function             |
-;;;   |----------------------------------------------|
-;;;   | Frame pointer for this function              |
-;;;   |----------------------------------------------| <- fp(=old sp)
-;;;   |.empty.space.to.make.part.below.aligned.in....|
-;;;   |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is
-;;;   |.alignment....................................|  unknown at compile time)
-;;;   |----------------------------------------------|
-;;;   | Local variables of fixed size including spill|
-;;;   | slots                                        |
-;;;   |----------------------------------------------| <- bp(not defined by ABI,
-;;;   |.variable-sized.local.variables.(VLAs)........|       LLVM chooses SX17)
-;;;   |..............................................| (size of this area is
-;;;   |..............................................|  unknown at compile time)
-;;;   |----------------------------------------------| <- stack top (returned by
-;;;   | Parameter area for callee                    |               alloca)
-;;;   |----------------------------------------------|
-;;;   | Register save area (RSA) for callee          |
-;;;   |----------------------------------------------|
-;;;   | Return address for callee                    |
-;;;   |----------------------------------------------|
-;;;   | Frame pointer for callee                     |
-;;;   |----------------------------------------------| <- sp
-;;;   |                                              | Lower address
-;;;
-;;; Access local variable using bp (%s17) since the size of alignment
-;;; and VLA are not known.  At the beginning of the functions, allocates
-;;; pad(240 + data + align) bytes.  Then, access data through bp + pad(240)
-;;; since this address doesn't change even if VLA is dynamically allocated.
-;;;
-;;; Fourth test using a general stack with some spills.
-;;;
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storei64_stk(i64 noundef %0) {
-; CHECK-LABEL: storei64_stk:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    adds.l %s11, -16, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB0_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB0_2:
-; CHECK-NEXT:    st %s0, 8(, %s11)
-; CHECK-NEXT:    adds.l %s11, 16, %s11
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca i64, align 8
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
-  store volatile i64 %0, ptr %2, align 8, !tbaa !3
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
-  ret void
-}
-
-; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
-
-; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storei64_stk_big(i64 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storei64_stk_big:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lea %s11, -2147483648(, %s11)
-; CHECK-NEXT:    brge.l %s11, %s8, .LBB1_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB1_4:
-; CHECK-NEXT:    st %s0, 2147483640(, %s11)
-; CHECK-NEXT:    or %s0, 0, (0)1
-; CHECK-NEXT:    lea %s2, 2147483640
-; CHECK-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    st %s1, (%s0, %s11)
-; CHECK-NEXT:    lea %s0, 8(, %s0)
-; CHECK-NEXT:    brne.l %s0, %s2, .LBB1_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lea %s13, -2147483648
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca i64, align 8
-  %4 = alloca [268435455 x i64], align 8
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
-  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %4)
-  store volatile i64 %0, ptr %3, align 8, !tbaa !3
-  br label %6
-
-5:                                                ; preds = %6
-  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %4)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
-  ret void
-
-6:                                                ; preds = %2, %6
-  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
-  %8 = getelementptr inbounds [268435455 x i64], ptr %4, i64 0, i64 %7
-  store volatile i64 %1, ptr %8, align 8, !tbaa !3
-  %9 = add nuw nsw i64 %7, 1
-  %10 = icmp eq i64 %9, 268435455
-  br i1 %10, label %5, label %6, !llvm.loop !7
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storei64_stk_big2(i64 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storei64_stk_big2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lea %s13, 2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
-; CHECK-NEXT:    brge.l %s11, %s8, .LBB2_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB2_4:
-; CHECK-NEXT:    lea %s13, -2147483640
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
-; CHECK-NEXT:    st %s0, (, %s13)
-; CHECK-NEXT:    or %s0, 0, (0)1
-; CHECK-NEXT:    lea %s2, -2147483648
-; CHECK-NEXT:    and %s2, %s2, (32)0
-; CHECK-NEXT:  .LBB2_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    st %s1, 8(%s0, %s11)
-; CHECK-NEXT:    lea %s0, 8(, %s0)
-; CHECK-NEXT:    brne.l %s0, %s2, .LBB2_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lea %s13, -2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca i64, align 8
-  %4 = alloca [268435456 x i64], align 8
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
-  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4)
-  store volatile i64 %0, ptr %3, align 8, !tbaa !3
-  br label %6
-
-5:                                                ; preds = %6
-  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
-  ret void
-
-6:                                                ; preds = %2, %6
-  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
-  %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7
-  store volatile i64 %1, ptr %8, align 8, !tbaa !3
-  %9 = add nuw nsw i64 %7, 1
-  %10 = icmp eq i64 %9, 268435456
-  br i1 %10, label %5, label %6, !llvm.loop !9
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storei64_stk_dyn(i64 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storei64_stk_dyn:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -256(, %s11)
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB3_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB3_2:
-; CHECK-NEXT:    or %s2, 0, %s0
-; CHECK-NEXT:    lea %s0, 15(, %s1)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    st %s2, (, %s0)
-; CHECK-NEXT:    st %s2, -8(, %s9)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca i64, align 8
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
-  %4 = alloca i8, i64 %1, align 8
-  store volatile i64 %0, ptr %4, align 8, !tbaa !3
-  store volatile i64 %0, ptr %3, align 8, !tbaa !3
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
-  ret void
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storei64_stk_dyn_align(i64 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storei64_stk_dyn_align:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -288(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (59)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB4_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB4_2:
-; CHECK-NEXT:    or %s2, 0, %s0
-; CHECK-NEXT:    lea %s0, 15(, %s1)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    st %s2, (, %s0)
-; CHECK-NEXT:    st %s2, 256(, %s17)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca i64, align 32
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
-  %4 = alloca i8, i64 %1, align 8
-  store volatile i64 %0, ptr %4, align 8, !tbaa !3
-  store volatile i64 %0, ptr %3, align 32, !tbaa !10
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
-  ret void
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storei64_stk_dyn_align2(i64 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storei64_stk_dyn_align2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -320(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (58)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB5_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB5_2:
-; CHECK-NEXT:    or %s2, 0, %s0
-; CHECK-NEXT:    lea %s0, 15(, %s1)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    st %s2, (, %s0)
-; CHECK-NEXT:    st %s2, 288(, %s17)
-; CHECK-NEXT:    st %s2, 256(, %s17)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca i64, align 32
-  %4 = alloca i64, align 64
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
-  %5 = alloca i8, i64 %1, align 8
-  store volatile i64 %0, ptr %5, align 8, !tbaa !3
-  store volatile i64 %0, ptr %3, align 32, !tbaa !10
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %4)
-  store volatile i64 %0, ptr %4, align 64, !tbaa !10
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4)
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
-  ret void
-}
-
-; Function Attrs: nounwind
-define x86_fastcallcc void @storei64_stk_dyn_align_spill(i64 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storei64_stk_dyn_align_spill:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -288(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (59)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB6_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB6_2:
-; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    st %s19, 56(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    st %s20, 64(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    or %s18, 0, %s1
-; CHECK-NEXT:    or %s19, 0, %s0
-; CHECK-NEXT:    lea %s0, 15(, %s1)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s20, 240(, %s11)
-; CHECK-NEXT:    lea %s0, dummy at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, dummy at hi(, %s0)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, pass at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, pass at hi(, %s0)
-; CHECK-NEXT:    or %s0, 0, %s18
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    st %s19, (, %s20)
-; CHECK-NEXT:    st %s19, 256(, %s17)
-; CHECK-NEXT:    ld %s20, 64(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    ld %s19, 56(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca i64, align 32
-  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3)
-  %4 = alloca i8, i64 %1, align 8
-  tail call void (...) @dummy()
-  tail call void @pass(i64 noundef %1)
-  store volatile i64 %0, ptr %4, align 8, !tbaa !3
-  store volatile i64 %0, ptr %3, align 32, !tbaa !10
-  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3)
-  ret void
-}
-
-declare void @dummy(...)
-
-declare void @pass(i64 noundef)
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storequad_stk(fp128 noundef %0) {
-; CHECK-LABEL: storequad_stk:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    adds.l %s11, -16, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB7_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB7_2:
-; CHECK-NEXT:    st %s1, (, %s11)
-; CHECK-NEXT:    st %s0, 8(, %s11)
-; CHECK-NEXT:    adds.l %s11, 16, %s11
-; CHECK-NEXT:    b.l.t (, %s10)
-  %2 = alloca fp128, align 16
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %2)
-  store volatile fp128 %0, ptr %2, align 16, !tbaa !12
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %2)
-  ret void
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storequad_stk_big(fp128 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storequad_stk_big:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lea %s13, 2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
-; CHECK-NEXT:    brge.l %s11, %s8, .LBB8_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB8_4:
-; CHECK-NEXT:    lea %s13, -2147483648
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
-; CHECK-NEXT:    st %s1, (, %s13)
-; CHECK-NEXT:    st %s0, 8(, %s13)
-; CHECK-NEXT:    or %s0, 0, (0)1
-; CHECK-NEXT:    lea %s1, 2147483640
-; CHECK-NEXT:  .LBB8_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    st %s2, 8(%s0, %s11)
-; CHECK-NEXT:    lea %s0, 8(, %s0)
-; CHECK-NEXT:    brne.l %s0, %s1, .LBB8_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lea %s13, -2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca fp128, align 16
-  %4 = alloca [268435455 x i64], align 8
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
-  call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %4)
-  store volatile fp128 %0, ptr %3, align 16, !tbaa !12
-  br label %6
-
-5:                                                ; preds = %6
-  call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %4)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
-  ret void
-
-6:                                                ; preds = %2, %6
-  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
-  %8 = getelementptr inbounds [268435455 x i64], ptr %4, i64 0, i64 %7
-  store volatile i64 %1, ptr %8, align 8, !tbaa !3
-  %9 = add nuw nsw i64 %7, 1
-  %10 = icmp eq i64 %9, 268435455
-  br i1 %10, label %5, label %6, !llvm.loop !14
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storequad_stk_big2(fp128 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storequad_stk_big2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lea %s13, 2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, -1(%s13, %s11)
-; CHECK-NEXT:    brge.l %s11, %s8, .LBB9_4
-; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB9_4:
-; CHECK-NEXT:    lea %s13, -2147483648
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s13, (%s11, %s13)
-; CHECK-NEXT:    st %s1, (, %s13)
-; CHECK-NEXT:    st %s0, 8(, %s13)
-; CHECK-NEXT:    or %s0, 0, (0)1
-; CHECK-NEXT:    lea %s1, -2147483648
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:  .LBB9_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    st %s2, (%s0, %s11)
-; CHECK-NEXT:    lea %s0, 8(, %s0)
-; CHECK-NEXT:    brne.l %s0, %s1, .LBB9_1
-; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    lea %s13, -2147483632
-; CHECK-NEXT:    and %s13, %s13, (32)0
-; CHECK-NEXT:    lea.sl %s11, (%s13, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca fp128, align 16
-  %4 = alloca [268435456 x i64], align 8
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
-  call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4)
-  store volatile fp128 %0, ptr %3, align 16, !tbaa !12
-  br label %6
-
-5:                                                ; preds = %6
-  call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
-  ret void
-
-6:                                                ; preds = %2, %6
-  %7 = phi i64 [ 0, %2 ], [ %9, %6 ]
-  %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7
-  store volatile i64 %1, ptr %8, align 8, !tbaa !3
-  %9 = add nuw nsw i64 %7, 1
-  %10 = icmp eq i64 %9, 268435456
-  br i1 %10, label %5, label %6, !llvm.loop !15
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storequad_stk_dyn(fp128 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storequad_stk_dyn:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -256(, %s11)
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB10_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB10_2:
-; CHECK-NEXT:    or %s4, 0, %s0
-; CHECK-NEXT:    or %s5, 0, %s1
-; CHECK-NEXT:    lea %s0, 15(, %s2)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    st %s4, 8(, %s0)
-; CHECK-NEXT:    st %s5, (, %s0)
-; CHECK-NEXT:    st %s5, -16(, %s9)
-; CHECK-NEXT:    st %s4, -8(, %s9)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca fp128, align 16
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
-  %4 = alloca i8, i64 %1, align 16
-  store volatile fp128 %0, ptr %4, align 16, !tbaa !12
-  store volatile fp128 %0, ptr %3, align 16, !tbaa !12
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
-  ret void
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storequad_stk_dyn_align(fp128 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storequad_stk_dyn_align:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -288(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (59)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB11_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB11_2:
-; CHECK-NEXT:    or %s4, 0, %s0
-; CHECK-NEXT:    or %s5, 0, %s1
-; CHECK-NEXT:    lea %s0, 15(, %s2)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    st %s4, 8(, %s0)
-; CHECK-NEXT:    st %s5, (, %s0)
-; CHECK-NEXT:    st %s5, 256(, %s17)
-; CHECK-NEXT:    st %s4, 264(, %s17)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca fp128, align 32
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
-  %4 = alloca i8, i64 %1, align 16
-  store volatile fp128 %0, ptr %4, align 16, !tbaa !12
-  store volatile fp128 %0, ptr %3, align 32, !tbaa !16
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
-  ret void
-}
-
-; Function Attrs: argmemonly nofree nounwind
-define x86_fastcallcc void @storequad_stk_dyn_align2(fp128 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storequad_stk_dyn_align2:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -320(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (58)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB12_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB12_2:
-; CHECK-NEXT:    or %s4, 0, %s0
-; CHECK-NEXT:    or %s5, 0, %s1
-; CHECK-NEXT:    lea %s0, 15(, %s2)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, 240(, %s11)
-; CHECK-NEXT:    st %s4, 8(, %s0)
-; CHECK-NEXT:    st %s5, (, %s0)
-; CHECK-NEXT:    st %s5, 288(, %s17)
-; CHECK-NEXT:    st %s4, 296(, %s17)
-; CHECK-NEXT:    st %s5, 256(, %s17)
-; CHECK-NEXT:    st %s4, 264(, %s17)
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca fp128, align 32
-  %4 = alloca fp128, align 64
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
-  %5 = alloca i8, i64 %1, align 16
-  store volatile fp128 %0, ptr %5, align 16, !tbaa !12
-  store volatile fp128 %0, ptr %3, align 32, !tbaa !16
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %4)
-  store volatile fp128 %0, ptr %4, align 64, !tbaa !16
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %4)
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
-  ret void
-}
-
-; Function Attrs: nounwind
-define x86_fastcallcc void @storequad_stk_dyn_align_spill(fp128 noundef %0, i64 noundef %1) {
-; CHECK-LABEL: storequad_stk_dyn_align_spill:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    st %s9, (, %s11)
-; CHECK-NEXT:    st %s10, 8(, %s11)
-; CHECK-NEXT:    st %s17, 40(, %s11)
-; CHECK-NEXT:    or %s9, 0, %s11
-; CHECK-NEXT:    lea %s11, -288(, %s11)
-; CHECK-NEXT:    and %s11, %s11, (59)1
-; CHECK-NEXT:    or %s17, 0, %s11
-; CHECK-NEXT:    brge.l.t %s11, %s8, .LBB13_2
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    ld %s61, 24(, %s14)
-; CHECK-NEXT:    or %s62, 0, %s0
-; CHECK-NEXT:    lea %s63, 315
-; CHECK-NEXT:    shm.l %s63, (%s61)
-; CHECK-NEXT:    shm.l %s8, 8(%s61)
-; CHECK-NEXT:    shm.l %s11, 16(%s61)
-; CHECK-NEXT:    monc
-; CHECK-NEXT:    or %s0, 0, %s62
-; CHECK-NEXT:  .LBB13_2:
-; CHECK-NEXT:    st %s18, 48(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    st %s19, 56(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    st %s20, 64(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    st %s21, 72(, %s9) # 8-byte Folded Spill
-; CHECK-NEXT:    or %s18, 0, %s2
-; CHECK-NEXT:    or %s20, 0, %s0
-; CHECK-NEXT:    or %s21, 0, %s1
-; CHECK-NEXT:    lea %s0, 15(, %s2)
-; CHECK-NEXT:    and %s0, -16, %s0
-; CHECK-NEXT:    lea %s1, __ve_grow_stack at lo
-; CHECK-NEXT:    and %s1, %s1, (32)0
-; CHECK-NEXT:    lea.sl %s12, __ve_grow_stack at hi(, %s1)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s19, 240(, %s11)
-; CHECK-NEXT:    lea %s0, dummy at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, dummy at hi(, %s0)
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    lea %s0, pass at lo
-; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    lea.sl %s12, pass at hi(, %s0)
-; CHECK-NEXT:    or %s0, 0, %s18
-; CHECK-NEXT:    bsic %s10, (, %s12)
-; CHECK-NEXT:    st %s20, 8(, %s19)
-; CHECK-NEXT:    st %s21, (, %s19)
-; CHECK-NEXT:    st %s21, 256(, %s17)
-; CHECK-NEXT:    st %s20, 264(, %s17)
-; CHECK-NEXT:    ld %s21, 72(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    ld %s20, 64(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    ld %s19, 56(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    ld %s18, 48(, %s9) # 8-byte Folded Reload
-; CHECK-NEXT:    or %s11, 0, %s9
-; CHECK-NEXT:    ld %s17, 40(, %s11)
-; CHECK-NEXT:    ld %s10, 8(, %s11)
-; CHECK-NEXT:    ld %s9, (, %s11)
-; CHECK-NEXT:    b.l.t (, %s10)
-  %3 = alloca fp128, align 32
-  call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %3)
-  %4 = alloca i8, i64 %1, align 16
-  tail call void (...) @dummy()
-  tail call void @pass(i64 noundef %1)
-  store volatile fp128 %0, ptr %4, align 16, !tbaa !12
-  store volatile fp128 %0, ptr %3, align 32, !tbaa !16
-  call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %3)
-  ret void
-}
-
-!3 = !{!4, !4, i64 0}
-!4 = !{!"long", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
-!7 = distinct !{!7, !8}
-!8 = !{!"llvm.loop.mustprogress"}
-!9 = distinct !{!9, !8}
-!10 = !{!11, !4, i64 0}
-!11 = !{!"", !4, i64 0}
-!12 = !{!13, !13, i64 0}
-!13 = !{!"long double", !5, i64 0}
-!14 = distinct !{!14, !8}
-!15 = distinct !{!15, !8}
-!16 = !{!17, !13, i64 0}
-!17 = !{!"", !13, i64 0}


        


More information about the llvm-commits mailing list