[Lldb-commits] [lldb] 55f9f87 - Reapply Revert "RegAllocFast: Rewrite and improve"

Matt Arsenault via lldb-commits lldb-commits at lists.llvm.org
Mon Sep 21 12:45:44 PDT 2020


Author: Matt Arsenault
Date: 2020-09-21T15:45:27-04:00
New Revision: 55f9f87da2c2ad791b9e62cccb1c035e037444fa

URL: https://github.com/llvm/llvm-project/commit/55f9f87da2c2ad791b9e62cccb1c035e037444fa
DIFF: https://github.com/llvm/llvm-project/commit/55f9f87da2c2ad791b9e62cccb1c035e037444fa.diff

LOG: Reapply Revert "RegAllocFast: Rewrite and improve"

This reverts commit dbd53a1f0c939a55e7719c39d08179468f9ad3dc.

Needed lldb test updates

Added: 
    llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
    llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir
    llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
    llvm/test/CodeGen/PowerPC/spill-nor0.mir
    llvm/test/CodeGen/X86/bug47278-eflags-error.mir
    llvm/test/CodeGen/X86/bug47278.mir

Modified: 
    lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
    llvm/lib/CodeGen/RegAllocFast.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
    llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
    llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
    llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
    llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
    llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
    llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll
    llvm/test/CodeGen/AArch64/arm64_32-null.ll
    llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
    llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
    llvm/test/CodeGen/AArch64/combine-loads.ll
    llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
    llvm/test/CodeGen/AArch64/popcount.ll
    llvm/test/CodeGen/AArch64/swift-return.ll
    llvm/test/CodeGen/AArch64/swifterror.ll
    llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
    llvm/test/CodeGen/AArch64/unwind-preserved.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
    llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
    llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
    llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
    llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
    llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
    llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
    llvm/test/CodeGen/AMDGPU/spill-agpr.mir
    llvm/test/CodeGen/AMDGPU/spill-m0.ll
    llvm/test/CodeGen/AMDGPU/spill192.mir
    llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
    llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
    llvm/test/CodeGen/ARM/Windows/alloca.ll
    llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll
    llvm/test/CodeGen/ARM/cmpxchg-O0.ll
    llvm/test/CodeGen/ARM/crash-greedy-v6.ll
    llvm/test/CodeGen/ARM/debug-info-blocks.ll
    llvm/test/CodeGen/ARM/fast-isel-call.ll
    llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll
    llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
    llvm/test/CodeGen/ARM/fast-isel-select.ll
    llvm/test/CodeGen/ARM/fast-isel-vararg.ll
    llvm/test/CodeGen/ARM/ldrd.ll
    llvm/test/CodeGen/ARM/legalize-bitcast.ll
    llvm/test/CodeGen/ARM/pr47454.ll
    llvm/test/CodeGen/ARM/stack-guard-reassign.ll
    llvm/test/CodeGen/ARM/swifterror.ll
    llvm/test/CodeGen/ARM/thumb-big-stack.ll
    llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
    llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
    llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
    llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll
    llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll
    llvm/test/CodeGen/Mips/atomic-min-max.ll
    llvm/test/CodeGen/Mips/atomic.ll
    llvm/test/CodeGen/Mips/atomic64.ll
    llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
    llvm/test/CodeGen/Mips/copy-fp64.ll
    llvm/test/CodeGen/Mips/implicit-sret.ll
    llvm/test/CodeGen/Mips/micromips-eva.mir
    llvm/test/CodeGen/Mips/msa/ldr_str.ll
    llvm/test/CodeGen/PowerPC/addegluecrash.ll
    llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll
    llvm/test/CodeGen/PowerPC/aix-overflow-toc.py
    llvm/test/CodeGen/PowerPC/anon_aggr.ll
    llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
    llvm/test/CodeGen/PowerPC/elf-common.ll
    llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll
    llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
    llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll
    llvm/test/CodeGen/PowerPC/fp64-to-int16.ll
    llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll
    llvm/test/CodeGen/PowerPC/popcount.ll
    llvm/test/CodeGen/PowerPC/spill-nor0.ll
    llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll
    llvm/test/CodeGen/PowerPC/vsx-args.ll
    llvm/test/CodeGen/PowerPC/vsx.ll
    llvm/test/CodeGen/SPARC/fp16-promote.ll
    llvm/test/CodeGen/SystemZ/swift-return.ll
    llvm/test/CodeGen/SystemZ/swifterror.ll
    llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll
    llvm/test/CodeGen/Thumb2/high-reg-spill.mir
    llvm/test/CodeGen/Thumb2/mve-vector-spill.ll
    llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
    llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
    llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
    llvm/test/CodeGen/X86/atomic-monotonic.ll
    llvm/test/CodeGen/X86/atomic-unordered.ll
    llvm/test/CodeGen/X86/atomic32.ll
    llvm/test/CodeGen/X86/atomic64.ll
    llvm/test/CodeGen/X86/atomic6432.ll
    llvm/test/CodeGen/X86/avx-load-store.ll
    llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
    llvm/test/CodeGen/X86/crash-O0.ll
    llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll
    llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll
    llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
    llvm/test/CodeGen/X86/fast-isel-select-sse.ll
    llvm/test/CodeGen/X86/fast-isel-select.ll
    llvm/test/CodeGen/X86/fast-isel-x86-64.ll
    llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll
    llvm/test/CodeGen/X86/mixed-ptr-sizes.ll
    llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
    llvm/test/CodeGen/X86/pr11415.ll
    llvm/test/CodeGen/X86/pr1489.ll
    llvm/test/CodeGen/X86/pr27591.ll
    llvm/test/CodeGen/X86/pr30430.ll
    llvm/test/CodeGen/X86/pr30813.ll
    llvm/test/CodeGen/X86/pr32241.ll
    llvm/test/CodeGen/X86/pr32284.ll
    llvm/test/CodeGen/X86/pr32340.ll
    llvm/test/CodeGen/X86/pr32345.ll
    llvm/test/CodeGen/X86/pr32451.ll
    llvm/test/CodeGen/X86/pr32484.ll
    llvm/test/CodeGen/X86/pr34592.ll
    llvm/test/CodeGen/X86/pr34653.ll
    llvm/test/CodeGen/X86/pr39733.ll
    llvm/test/CodeGen/X86/pr42452.ll
    llvm/test/CodeGen/X86/pr44749.ll
    llvm/test/CodeGen/X86/pr47000.ll
    llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir
    llvm/test/CodeGen/X86/stack-protector-msvc.ll
    llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll
    llvm/test/CodeGen/X86/swift-return.ll
    llvm/test/CodeGen/X86/swifterror.ll
    llvm/test/CodeGen/X86/volatile.ll
    llvm/test/CodeGen/X86/win64_eh.ll
    llvm/test/CodeGen/X86/x86-32-intrcc.ll
    llvm/test/CodeGen/X86/x86-64-intrcc.ll
    llvm/test/DebugInfo/AArch64/frameindices.ll
    llvm/test/DebugInfo/AArch64/prologue_end.ll
    llvm/test/DebugInfo/ARM/prologue_end.ll
    llvm/test/DebugInfo/Mips/delay-slot.ll
    llvm/test/DebugInfo/Mips/prologue_end.ll
    llvm/test/DebugInfo/X86/dbg-declare-arg.ll
    llvm/test/DebugInfo/X86/fission-ranges.ll
    llvm/test/DebugInfo/X86/op_deref.ll
    llvm/test/DebugInfo/X86/parameters.ll
    llvm/test/DebugInfo/X86/pieces-1.ll
    llvm/test/DebugInfo/X86/prologue-stack.ll
    llvm/test/DebugInfo/X86/reference-argument.ll
    llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll
    llvm/test/DebugInfo/X86/sret.ll
    llvm/test/DebugInfo/X86/subreg.ll

Removed: 
    


################################################################################
diff  --git a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
index be0575541a62..8d101ba280e8 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/disassembly.cpp
@@ -28,11 +28,9 @@ int main(int argc, char **argv) {
 // CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+17>: mov    dword ptr [rsp + 0x24], ecx
 // CHECK:      ** 15     foo();
 // CHECK:      disassembly.cpp.tmp.exe[{{.*}}] <+21>: call   {{.*}}               ; foo at disassembly.cpp:12
-// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+26>: xor    ecx, ecx
-// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+28>: mov    dword ptr [rsp + 0x20], eax
+// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+26>: xor    eax, eax
 // CHECK:      ** 16     return 0;
 // CHECK-NEXT:    17   }
 // CHECK-NEXT:    18
-// CHECK:      disassembly.cpp.tmp.exe[{{.*}}] <+32>: mov    eax, ecx
-// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+34>: add    rsp, 0x38
-// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+38>: ret
+// CHECK:      disassembly.cpp.tmp.exe[{{.*}}] <+28>: add    rsp, 0x38
+// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+32>: ret

diff  --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 68308c6e1d4b..cfee1a77d6b8 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -56,6 +56,10 @@ STATISTIC(NumStores, "Number of stores added");
 STATISTIC(NumLoads , "Number of loads added");
 STATISTIC(NumCoalesced, "Number of copies coalesced");
 
+// FIXME: Remove this switch when all testcases are fixed!
+static cl::opt<bool> IgnoreMissingDefs("rafast-ignore-missing-defs",
+                                       cl::Hidden);
+
 static RegisterRegAlloc
   fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
 
@@ -85,8 +89,9 @@ namespace {
       MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
       Register VirtReg;                ///< Virtual register number.
       MCPhysReg PhysReg = 0;           ///< Currently held here.
-      unsigned short LastOpNum = 0;    ///< OpNum on LastUse.
-      bool Dirty = false;              ///< Register needs spill.
+      bool LiveOut = false;            ///< Register is possibly live out.
+      bool Reloaded = false;           ///< Register was reloaded.
+      bool Error = false;              ///< Could not allocate.
 
       explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
 
@@ -101,6 +106,9 @@ namespace {
     LiveRegMap LiveVirtRegs;
 
     DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
+    /// List of DBG_VALUE that we encountered without the vreg being assigned
+    /// because they were placed after the last use of the vreg.
+    DenseMap<unsigned, SmallVector<MachineInstr *, 1>> DanglingDbgValues;
 
     /// Has a bit set for every virtual register for which it was determined
     /// that it is alive across blocks.
@@ -112,9 +120,13 @@ namespace {
       /// immediately without checking aliases.
       regFree,
 
-      /// A reserved register has been assigned explicitly (e.g., setting up a
-      /// call parameter), and it remains reserved until it is used.
-      regReserved
+      /// A pre-assigned register has been assigned before register allocation
+      /// (e.g., setting up a call parameter).
+      regPreAssigned,
+
+      /// Used temporarily in reloadAtBegin() to mark register units that are
+      /// live-in to the basic block.
+      regLiveIn,
 
       /// A register state may also be a virtual register number, indication
       /// that the physical register is currently allocated to a virtual
@@ -124,15 +136,17 @@ namespace {
     /// Maps each physical register to a RegUnitState enum or virtual register.
     std::vector<unsigned> RegUnitStates;
 
-    SmallVector<Register, 16> VirtDead;
     SmallVector<MachineInstr *, 32> Coalesced;
 
     using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
     /// Set of register units that are used in the current instruction, and so
     /// cannot be allocated.
     RegUnitSet UsedInInstr;
+    RegUnitSet PhysRegUses;
+    SmallVector<uint16_t, 8> DefOperandIndexes;
 
     void setPhysRegState(MCPhysReg PhysReg, unsigned NewState);
+    bool isPhysRegFree(MCPhysReg PhysReg) const;
 
     /// Mark a physreg as used in this instruction.
     void markRegUsedInInstr(MCPhysReg PhysReg) {
@@ -141,13 +155,29 @@ namespace {
     }
 
     /// Check if a physreg or any of its aliases are used in this instruction.
-    bool isRegUsedInInstr(MCPhysReg PhysReg) const {
-      for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+    bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
+      for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
         if (UsedInInstr.count(*Units))
           return true;
+        if (LookAtPhysRegUses && PhysRegUses.count(*Units))
+          return true;
+      }
       return false;
     }
 
+    /// Mark physical register as being used in a register use operand.
+    /// This is only used by the special livethrough handling code.
+    void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
+      for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+        PhysRegUses.insert(*Units);
+    }
+
+    /// Remove mark of physical register being used in the instruction.
+    void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
+      for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
+        UsedInInstr.erase(*Units);
+    }
+
     enum : unsigned {
       spillClean = 50,
       spillDirty = 100,
@@ -177,27 +207,21 @@ namespace {
     bool runOnMachineFunction(MachineFunction &MF) override;
 
     void allocateBasicBlock(MachineBasicBlock &MBB);
+
+    void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+                              Register Reg) const;
+
     void allocateInstruction(MachineInstr &MI);
     void handleDebugValue(MachineInstr &MI);
-    void handleThroughOperands(MachineInstr &MI,
-                               SmallVectorImpl<Register> &VirtDead);
-    bool isLastUseOfLocalReg(const MachineOperand &MO) const;
-
-    void addKillFlag(const LiveReg &LRI);
 #ifndef NDEBUG
     bool verifyRegStateMapping(const LiveReg &LR) const;
 #endif
+    bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+    bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+    bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
+    void freePhysReg(MCPhysReg PhysReg);
 
-    void killVirtReg(LiveReg &LR);
-    void killVirtReg(Register VirtReg);
-    void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
-    void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg);
-
-    void usePhysReg(MachineOperand &MO);
-    void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
-                       unsigned NewState);
     unsigned calcSpillCost(MCPhysReg PhysReg) const;
-    void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
 
     LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
       return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
@@ -207,14 +231,24 @@ namespace {
       return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
     }
 
-    void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint);
+    void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg);
+    void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint,
+                      bool LookAtPhysRegUses = false);
     void allocVirtRegUndef(MachineOperand &MO);
-    MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
-                            Register Hint);
-    LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
-                           Register Hint);
-    void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
-    bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+    void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
+                                   MCPhysReg Reg);
+    void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+                                  Register VirtReg);
+    void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+                       bool LookAtPhysRegUses = false);
+    void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
+
+    MachineBasicBlock::iterator
+    getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
+                              SmallSet<Register, 2> &PrologLiveIns) const;
+
+    void reloadAtBegin(MachineBasicBlock &MBB);
+    void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
 
     Register traceCopies(Register VirtReg) const;
     Register traceCopyChain(Register Reg) const;
@@ -243,6 +277,14 @@ void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
     RegUnitStates[*UI] = NewState;
 }
 
+bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
+  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
+    if (RegUnitStates[*UI] != regFree)
+      return false;
+  }
+  return true;
+}
+
 /// This allocates space for the specified virtual register to be held on the
 /// stack.
 int RegAllocFast::getStackSpaceFor(Register VirtReg) {
@@ -300,7 +342,7 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) {
   // block.
   static const unsigned Limit = 8;
   unsigned C = 0;
-  for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
+  for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
     if (UseInst.getParent() != MBB || ++C >= Limit) {
       MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
       // Cannot be live-out if there are no successors.
@@ -352,15 +394,19 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
   TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI);
   ++NumStores;
 
-  // If this register is used by DBG_VALUE then insert new DBG_VALUE to
-  // identify spilled location as the place to find corresponding variable's
-  // value.
+  // When we spill a virtual register, we will have spill instructions behind
+  // every definition of it, meaning we can switch all the DBG_VALUEs over
+  // to just reference the stack slot.
   SmallVectorImpl<MachineInstr *> &LRIDbgValues = LiveDbgValueMap[VirtReg];
   for (MachineInstr *DBG : LRIDbgValues) {
     MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI);
     assert(NewDV->getParent() == MBB && "dangling parent pointer");
     (void)NewDV;
     LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV);
+    // Rewrite unassigned dbg_values to use the stack slot.
+    MachineOperand &MO = DBG->getOperand(0);
+    if (MO.isReg() && MO.getReg() == 0)
+      updateDbgValueForSpill(*DBG, FI);
   }
   // Now this register is spilled there is should not be any DBG_VALUE
   // pointing to this register because they are all pointing to spilled value
@@ -379,113 +425,75 @@ void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
   ++NumLoads;
 }
 
-/// Return true if MO is the only remaining reference to its virtual register,
-/// and it is guaranteed to be a block-local register.
-bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const {
-  // If the register has ever been spilled or reloaded, we conservatively assume
-  // it is a global register used in multiple blocks.
-  if (StackSlotForVirtReg[MO.getReg()] != -1)
-    return false;
-
-  // Check that the use/def chain has exactly one operand - MO.
-  MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
-  if (&*I != &MO)
-    return false;
-  return ++I == MRI->reg_nodbg_end();
-}
-
-/// Set kill flags on last use of a virtual register.
-void RegAllocFast::addKillFlag(const LiveReg &LR) {
-  if (!LR.LastUse) return;
-  MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
-  if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
-    if (MO.getReg() == LR.PhysReg)
-      MO.setIsKill();
-    // else, don't do anything we are problably redefining a
-    // subreg of this register and given we don't track which
-    // lanes are actually dead, we cannot insert a kill flag here.
-    // Otherwise we may end up in a situation like this:
-    // ... = (MO) physreg:sub1, implicit killed physreg
-    // ... <== Here we would allow later pass to reuse physreg:sub1
-    //         which is potentially wrong.
-    // LR:sub0 = ...
-    // ... = LR.sub1 <== This is going to use physreg:sub1
-  }
-}
-
-#ifndef NDEBUG
-bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
-  for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
-    if (RegUnitStates[*UI] != LR.VirtReg)
-      return false;
-  }
+/// Get basic block begin insertion point.
+/// This is not just MBB.begin() because surprisingly we have EH_LABEL
+/// instructions marking the begin of a basic block. This means we must insert
+/// new instructions after such labels...
+MachineBasicBlock::iterator
+RegAllocFast::getMBBBeginInsertionPoint(
+  MachineBasicBlock &MBB, SmallSet<Register, 2> &PrologLiveIns) const {
+  MachineBasicBlock::iterator I = MBB.begin();
+  while (I != MBB.end()) {
+    if (I->isLabel()) {
+      ++I;
+      continue;
+    }
 
-  return true;
-}
-#endif
+    // Most reloads should be inserted after prolog instructions.
+    if (!TII->isBasicBlockPrologue(*I))
+      break;
 
-/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(LiveReg &LR) {
-  assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
-  addKillFlag(LR);
-  MCPhysReg PhysReg = LR.PhysReg;
-  setPhysRegState(PhysReg, regFree);
-  LR.PhysReg = 0;
-}
+    // However if a prolog instruction reads a register that needs to be
+    // reloaded, the reload should be inserted before the prolog.
+    for (MachineOperand &MO : I->operands()) {
+      if (MO.isReg())
+        PrologLiveIns.insert(MO.getReg());
+    }
 
-/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(Register VirtReg) {
-  assert(Register::isVirtualRegister(VirtReg) &&
-         "killVirtReg needs a virtual register");
-  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
-  if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
-    killVirtReg(*LRI);
-}
+    ++I;
+  }
 
-/// This method spills the value specified by VirtReg into the corresponding
-/// stack slot if needed.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
-                                Register VirtReg) {
-  assert(Register::isVirtualRegister(VirtReg) &&
-         "Spilling a physical register is illegal!");
-  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
-  assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
-         "Spilling unmapped virtual register");
-  spillVirtReg(MI, *LRI);
+  return I;
 }
 
-/// Do the actual work of spilling.
-void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
-  assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
-
-  MCPhysReg PhysReg = LR.PhysReg;
+/// Reload all currently assigned virtual registers.
+void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
+  if (LiveVirtRegs.empty())
+    return;
 
-  if (LR.Dirty) {
-    // If this physreg is used by the instruction, we want to kill it on the
-    // instruction, not on the spill.
-    bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
-    LR.Dirty = false;
+  for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
+    MCPhysReg Reg = P.PhysReg;
+    // Set state to live-in. This possibly overrides mappings to virtual
+    // registers but we don't care anymore at this point.
+    setPhysRegState(Reg, regLiveIn);
+  }
 
-    spill(MI, LR.VirtReg, PhysReg, SpillKill);
 
-    if (SpillKill)
-      LR.LastUse = nullptr; // Don't kill register again
-  }
-  killVirtReg(LR);
-}
+  SmallSet<Register, 2> PrologLiveIns;
 
-/// Spill all dirty virtregs without killing them.
-void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
-  if (LiveVirtRegs.empty())
-    return;
   // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
   // of spilling here is deterministic, if arbitrary.
-  for (LiveReg &LR : LiveVirtRegs) {
-    if (!LR.PhysReg)
+  MachineBasicBlock::iterator InsertBefore
+    = getMBBBeginInsertionPoint(MBB, PrologLiveIns);
+  for (const LiveReg &LR : LiveVirtRegs) {
+    MCPhysReg PhysReg = LR.PhysReg;
+    if (PhysReg == 0)
       continue;
-    if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
+
+    unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+    if (RegUnitStates[FirstUnit] == regLiveIn)
       continue;
-    spillVirtReg(MI, LR);
+
+    assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) &&
+           "no reload in start block. Missing vreg def?");
+
+    if (PrologLiveIns.count(PhysReg)) {
+      // FIXME: Theoretically this should use an insert point skipping labels
+      // but I'm not sure how labels should interact with prolog instruction
+      // that need reloads.
+      reload(MBB.begin(), LR.VirtReg, PhysReg);
+    } else
+      reload(InsertBefore, LR.VirtReg, PhysReg);
   }
   LiveVirtRegs.clear();
 }
@@ -493,51 +501,74 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
 /// Handle the direct use of a physical register.  Check that the register is
 /// not used by a virtreg. Kill the physreg, marking it free. This may add
 /// implicit kills to MO->getParent() and invalidate MO.
-void RegAllocFast::usePhysReg(MachineOperand &MO) {
-  // Ignore undef uses.
-  if (MO.isUndef())
-    return;
-
-  Register PhysReg = MO.getReg();
-  assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
-
-  markRegUsedInInstr(PhysReg);
-
-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-    switch (RegUnitStates[*UI]) {
-    case regReserved:
-      RegUnitStates[*UI] = regFree;
-      LLVM_FALLTHROUGH;
-    case regFree:
-      break;
-    default:
-      llvm_unreachable("Unexpected reg unit state");
-    }
-  }
+bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+  assert(Register::isPhysicalRegister(Reg) && "expected physreg");
+  bool displacedAny = displacePhysReg(MI, Reg);
+  setPhysRegState(Reg, regPreAssigned);
+  markRegUsedInInstr(Reg);
+  return displacedAny;
+}
 
-  // All aliases are disabled, bring register into working set.
-  setPhysRegState(PhysReg, regFree);
-  MO.setIsKill();
+bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
+  bool displacedAny = displacePhysReg(MI, Reg);
+  setPhysRegState(Reg, regPreAssigned);
+  return displacedAny;
 }
 
 /// Mark PhysReg as reserved or free after spilling any virtregs. This is very
 /// similar to defineVirtReg except the physreg is reserved instead of
 /// allocated.
-void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
-                                 MCPhysReg PhysReg, unsigned NewState) {
+bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
+  bool displacedAny = false;
+
   for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-    switch (unsigned VirtReg = RegUnitStates[*UI]) {
-    default:
-      spillVirtReg(MI, VirtReg);
+    unsigned Unit = *UI;
+    switch (unsigned VirtReg = RegUnitStates[Unit]) {
+    default: {
+      LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+      assert(LRI != LiveVirtRegs.end() && "datastructures in sync");
+      MachineBasicBlock::iterator ReloadBefore =
+          std::next((MachineBasicBlock::iterator)MI.getIterator());
+      reload(ReloadBefore, VirtReg, LRI->PhysReg);
+
+      setPhysRegState(LRI->PhysReg, regFree);
+      LRI->PhysReg = 0;
+      LRI->Reloaded = true;
+      displacedAny = true;
+      break;
+    }
+    case regPreAssigned:
+      RegUnitStates[Unit] = regFree;
+      displacedAny = true;
       break;
     case regFree:
-    case regReserved:
       break;
     }
   }
+  return displacedAny;
+}
 
-  markRegUsedInInstr(PhysReg);
-  setPhysRegState(PhysReg, NewState);
+void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
+  LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
+
+  unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+  switch (unsigned VirtReg = RegUnitStates[FirstUnit]) {
+  case regFree:
+    LLVM_DEBUG(dbgs() << '\n');
+    return;
+  case regPreAssigned:
+    LLVM_DEBUG(dbgs() << '\n');
+    setPhysRegState(PhysReg, regFree);
+    return;
+  default: {
+      LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+      assert(LRI != LiveVirtRegs.end());
+      LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n');
+      setPhysRegState(LRI->PhysReg, regFree);
+      LRI->PhysReg = 0;
+    }
+    return;
+  }
 }
 
 /// Return the cost of spilling clearing out PhysReg and aliases so it is free
@@ -545,35 +576,61 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
 /// disabled - it can be allocated directly.
 /// \returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
-  if (isRegUsedInInstr(PhysReg)) {
-    LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
-                      << " is already used in instr.\n");
-    return spillImpossible;
-  }
-
   for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
     switch (unsigned VirtReg = RegUnitStates[*UI]) {
     case regFree:
       break;
-    case regReserved:
-      LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
-                        << printReg(PhysReg, TRI) << " is reserved already.\n");
+    case regPreAssigned:
+      LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned "
+                        << printReg(PhysReg, TRI) << '\n');
       return spillImpossible;
     default: {
-      LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
-      assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
-             "Missing VirtReg entry");
-      return LRI->Dirty ? spillDirty : spillClean;
+      bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 ||
+                       findLiveVirtReg(VirtReg)->LiveOut;
+      return SureSpill ? spillClean : spillDirty;
     }
     }
   }
   return 0;
 }
 
+void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition,
+                                             Register VirtReg, MCPhysReg Reg) {
+  auto UDBGValIter = DanglingDbgValues.find(VirtReg);
+  if (UDBGValIter == DanglingDbgValues.end())
+    return;
+
+  SmallVectorImpl<MachineInstr*> &Dangling = UDBGValIter->second;
+  for (MachineInstr *DbgValue : Dangling) {
+    assert(DbgValue->isDebugValue());
+    MachineOperand &MO = DbgValue->getOperand(0);
+    if (!MO.isReg())
+      continue;
+
+    // Test whether the physreg survives from the definition to the DBG_VALUE.
+    MCPhysReg SetToReg = Reg;
+    unsigned Limit = 20;
+    for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()),
+         E = DbgValue->getIterator(); I != E; ++I) {
+      if (I->modifiesRegister(Reg, TRI) || --Limit == 0) {
+        LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
+                   << '\n');
+        SetToReg = 0;
+        break;
+      }
+    }
+    MO.setReg(SetToReg);
+    if (SetToReg != 0)
+      MO.setIsRenamable();
+  }
+  Dangling.clear();
+}
+
 /// This method updates local state so that we know that PhysReg is the
 /// proper container for VirtReg now.  The physical register must not be used
 /// for anything else when this is called.
-void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
+void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR,
+                                       MCPhysReg PhysReg) {
   Register VirtReg = LR.VirtReg;
   LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
                     << printReg(PhysReg, TRI) << '\n');
@@ -581,6 +638,8 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
   assert(PhysReg != 0 && "Trying to assign no register");
   LR.PhysReg = PhysReg;
   setPhysRegState(PhysReg, VirtReg);
+
+  assignDanglingDebugValues(AtMI, VirtReg, PhysReg);
 }
 
 static bool isCoalescable(const MachineInstr &MI) {
@@ -624,11 +683,10 @@ Register RegAllocFast::traceCopies(Register VirtReg) const {
 }
 
 /// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR,
+                                Register Hint0, bool LookAtPhysRegUses) {
   const Register VirtReg = LR.VirtReg;
-
-  assert(Register::isVirtualRegister(VirtReg) &&
-         "Can only allocate virtual registers");
+  assert(LR.PhysReg == 0);
 
   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
   LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
@@ -636,41 +694,36 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
                     << " with hint " << printReg(Hint0, TRI) << '\n');
 
   // Take hint when possible.
-  if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) &&
-      RC.contains(Hint0)) {
-    // Ignore the hint if we would have to spill a dirty register.
-    unsigned Cost = calcSpillCost(Hint0);
-    if (Cost < spillDirty) {
+  if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) &&
+      !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) {
+    // Take hint if the register is currently free.
+    if (isPhysRegFree(Hint0)) {
       LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
                         << '\n');
-      if (Cost)
-        definePhysReg(MI, Hint0, regFree);
-      assignVirtToPhysReg(LR, Hint0);
+      assignVirtToPhysReg(MI, LR, Hint0);
       return;
     } else {
-      LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
-                        << "occupied\n");
+      LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI)
+                        << " occupied\n");
     }
   } else {
     Hint0 = Register();
   }
 
+
   // Try other hint.
   Register Hint1 = traceCopies(VirtReg);
-  if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) &&
-      RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
-    // Ignore the hint if we would have to spill a dirty register.
-    unsigned Cost = calcSpillCost(Hint1);
-    if (Cost < spillDirty) {
+  if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
+      !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) {
+    // Take hint if the register is currently free.
+    if (isPhysRegFree(Hint1)) {
       LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
-                        << '\n');
-      if (Cost)
-        definePhysReg(MI, Hint1, regFree);
-      assignVirtToPhysReg(LR, Hint1);
+                 << '\n');
+      assignVirtToPhysReg(MI, LR, Hint1);
       return;
     } else {
-      LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
-                        << "occupied\n");
+      LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI)
+                 << " occupied\n");
     }
   } else {
     Hint1 = Register();
@@ -681,15 +734,20 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
   ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
   for (MCPhysReg PhysReg : AllocationOrder) {
     LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
+    if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) {
+      LLVM_DEBUG(dbgs() << "already used in instr.\n");
+      continue;
+    }
+
     unsigned Cost = calcSpillCost(PhysReg);
     LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n');
     // Immediate take a register with cost 0.
     if (Cost == 0) {
-      assignVirtToPhysReg(LR, PhysReg);
+      assignVirtToPhysReg(MI, LR, PhysReg);
       return;
     }
 
-    if (PhysReg == Hint1 || PhysReg == Hint0)
+    if (PhysReg == Hint0 || PhysReg == Hint1)
       Cost -= spillPrefBonus;
 
     if (Cost < BestCost) {
@@ -705,13 +763,14 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
       MI.emitError("inline assembly requires more registers than available");
     else
       MI.emitError("ran out of registers during register allocation");
-    definePhysReg(MI, *AllocationOrder.begin(), regFree);
-    assignVirtToPhysReg(LR, *AllocationOrder.begin());
+
+    LR.Error = true;
+    LR.PhysReg = 0;
     return;
   }
 
-  definePhysReg(MI, BestReg, regFree);
-  assignVirtToPhysReg(LR, BestReg);
+  displacePhysReg(MI, BestReg);
+  assignVirtToPhysReg(MI, LR, BestReg);
 }
 
 void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
@@ -739,212 +798,166 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
   MO.setIsRenamable(true);
 }
 
-/// Allocates a register for VirtReg and mark it as dirty.
-MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
-                                      Register VirtReg, Register Hint) {
-  assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
+/// Variation of defineVirtReg() with special handling for livethrough regs
+/// (tied or earlyclobber) that may interfere with preassigned uses.
+void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+                                            Register VirtReg) {
+  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+  if (LRI != LiveVirtRegs.end()) {
+    MCPhysReg PrevReg = LRI->PhysReg;
+    if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) {
+      LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI)
+                        << " (tied/earlyclobber resolution)\n");
+      freePhysReg(PrevReg);
+      LRI->PhysReg = 0;
+      allocVirtReg(MI, *LRI, 0, true);
+      MachineBasicBlock::iterator InsertBefore =
+        std::next((MachineBasicBlock::iterator)MI.getIterator());
+      LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to "
+                        << printReg(PrevReg, TRI) << '\n');
+      BuildMI(*MBB, InsertBefore, MI.getDebugLoc(),
+              TII->get(TargetOpcode::COPY), PrevReg)
+        .addReg(LRI->PhysReg, llvm::RegState::Kill);
+    }
+    MachineOperand &MO = MI.getOperand(OpNum);
+    if (MO.getSubReg() && !MO.isUndef()) {
+      LRI->LastUse = &MI;
+    }
+  }
+  return defineVirtReg(MI, OpNum, VirtReg, true);
+}
+
+/// Allocates a register for VirtReg definition. Typically the register is
+/// already assigned from a use of the virtreg, however we still need to
+/// perform an allocation if:
+/// - It is a dead definition without any uses.
+/// - The value is live out and all uses are in 
diff erent basic blocks.
+void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+                                 Register VirtReg, bool LookAtPhysRegUses) {
+  assert(VirtReg.isVirtual() && "Not a virtual register");
+  MachineOperand &MO = MI.getOperand(OpNum);
   LiveRegMap::iterator LRI;
   bool New;
   std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
-  if (!LRI->PhysReg) {
-    // If there is no hint, peek at the only use of this register.
-    if ((!Hint || !Hint.isPhysical()) &&
-        MRI->hasOneNonDBGUse(VirtReg)) {
-      const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
-      // It's a copy, use the destination register as a hint.
-      if (UseMI.isCopyLike())
-        Hint = UseMI.getOperand(0).getReg();
+  if (New) {
+    if (!MO.isDead()) {
+      if (mayLiveOut(VirtReg)) {
+        LRI->LiveOut = true;
+      } else {
+        // It is a dead def without the dead flag; add the flag now.
+        MO.setIsDead(true);
+      }
     }
-    allocVirtReg(MI, *LRI, Hint);
-  } else if (LRI->LastUse) {
-    // Redefining a live register - kill at the last use, unless it is this
-    // instruction defining VirtReg multiple times.
-    if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
-      addKillFlag(*LRI);
   }
-  assert(LRI->PhysReg && "Register not assigned");
-  LRI->LastUse = &MI;
-  LRI->LastOpNum = OpNum;
-  LRI->Dirty = true;
-  markRegUsedInInstr(LRI->PhysReg);
-  return LRI->PhysReg;
+  if (LRI->PhysReg == 0)
+    allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses);
+  else {
+    assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) &&
+           "TODO: preassign mismatch");
+    LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI)
+                      << " use existing assignment to "
+                      << printReg(LRI->PhysReg, TRI) << '\n');
+  }
+
+  MCPhysReg PhysReg = LRI->PhysReg;
+  assert(PhysReg != 0 && "Register not assigned");
+  if (LRI->Reloaded || LRI->LiveOut) {
+    if (!MI.isImplicitDef()) {
+      MachineBasicBlock::iterator SpillBefore =
+          std::next((MachineBasicBlock::iterator)MI.getIterator());
+      LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: "
+                        << LRI->Reloaded << '\n');
+      bool Kill = LRI->LastUse == nullptr;
+      spill(SpillBefore, VirtReg, PhysReg, Kill);
+      LRI->LastUse = nullptr;
+    }
+    LRI->LiveOut = false;
+    LRI->Reloaded = false;
+  }
+  markRegUsedInInstr(PhysReg);
+  setPhysReg(MI, MO, PhysReg);
 }
 
-/// Make sure VirtReg is available in a physreg and return it.
-RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
-                                                   unsigned OpNum,
-                                                   Register VirtReg,
-                                                   Register Hint) {
-  assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
+/// Allocates a register for a VirtReg use.
+void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
+                              Register VirtReg) {
+  assert(VirtReg.isVirtual() && "Not a virtual register");
+  MachineOperand &MO = MI.getOperand(OpNum);
   LiveRegMap::iterator LRI;
   bool New;
   std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
-  MachineOperand &MO = MI.getOperand(OpNum);
-  if (!LRI->PhysReg) {
-    allocVirtReg(MI, *LRI, Hint);
-    reload(MI, VirtReg, LRI->PhysReg);
-  } else if (LRI->Dirty) {
-    if (isLastUseOfLocalReg(MO)) {
-      LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n');
-      if (MO.isUse())
-        MO.setIsKill();
-      else
-        MO.setIsDead();
-    } else if (MO.isKill()) {
-      LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n');
-      MO.setIsKill(false);
-    } else if (MO.isDead()) {
-      LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n');
-      MO.setIsDead(false);
+  if (New) {
+    MachineOperand &MO = MI.getOperand(OpNum);
+    if (!MO.isKill()) {
+      if (mayLiveOut(VirtReg)) {
+        LRI->LiveOut = true;
+      } else {
+        // It is a last (killing) use without the kill flag; add the flag now.
+        MO.setIsKill(true);
+      }
     }
-  } else if (MO.isKill()) {
-    // We must remove kill flags from uses of reloaded registers because the
-    // register would be killed immediately, and there might be a second use:
-    //   %foo = OR killed %x, %x
-    // This would cause a second reload of %x into a 
diff erent register.
-    LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n');
-    MO.setIsKill(false);
-  } else if (MO.isDead()) {
-    LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n');
-    MO.setIsDead(false);
+  } else {
+    assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag");
   }
-  assert(LRI->PhysReg && "Register not assigned");
+
+  // If necessary allocate a register.
+  if (LRI->PhysReg == 0) {
+    assert(!MO.isTied() && "tied op should be allocated");
+    Register Hint;
+    if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) {
+      Hint = MI.getOperand(0).getReg();
+      assert(Hint.isPhysical() &&
+             "Copy destination should already be assigned");
+    }
+    allocVirtReg(MI, *LRI, Hint, false);
+    if (LRI->Error) {
+      const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+      ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+      setPhysReg(MI, MO, *AllocationOrder.begin());
+      return;
+    }
+  }
+
   LRI->LastUse = &MI;
-  LRI->LastOpNum = OpNum;
   markRegUsedInInstr(LRI->PhysReg);
-  return *LRI;
+  setPhysReg(MI, MO, LRI->PhysReg);
 }
 
 /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
 /// may invalidate any operand pointers.  Return true if the operand kills its
 /// register.
-bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
+void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
                               MCPhysReg PhysReg) {
-  bool Dead = MO.isDead();
   if (!MO.getSubReg()) {
     MO.setReg(PhysReg);
     MO.setIsRenamable(true);
-    return MO.isKill() || Dead;
+    return;
   }
 
   // Handle subregister index.
   MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register());
   MO.setIsRenamable(true);
-  MO.setSubReg(0);
+  // Note: We leave the subreg number around a little longer in case of defs.
+  // This is so that the register freeing logic in allocateInstruction can still
+  // recognize this as subregister defs. The code there will clear the number.
+  if (!MO.isDef())
+    MO.setSubReg(0);
 
   // A kill flag implies killing the full register. Add corresponding super
   // register kill.
   if (MO.isKill()) {
     MI.addRegisterKilled(PhysReg, TRI, true);
-    return true;
+    return;
   }
 
   // A <def,read-undef> of a sub-register requires an implicit def of the full
   // register.
-  if (MO.isDef() && MO.isUndef())
-    MI.addRegisterDefined(PhysReg, TRI);
-
-  return Dead;
-}
-
-// Handles special instruction operand like early clobbers and tied ops when
-// there are additional physreg defines.
-void RegAllocFast::handleThroughOperands(MachineInstr &MI,
-                                         SmallVectorImpl<Register> &VirtDead) {
-  LLVM_DEBUG(dbgs() << "Scanning for through registers:");
-  SmallSet<Register, 8> ThroughRegs;
-  for (const MachineOperand &MO : MI.operands()) {
-    if (!MO.isReg()) continue;
-    Register Reg = MO.getReg();
-    if (!Reg.isVirtual())
-      continue;
-    if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
-        (MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
-      if (ThroughRegs.insert(Reg).second)
-        LLVM_DEBUG(dbgs() << ' ' << printReg(Reg));
-    }
-  }
-
-  // If any physreg defines collide with preallocated through registers,
-  // we must spill and reallocate.
-  LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
-  for (const MachineOperand &MO : MI.operands()) {
-    if (!MO.isReg() || !MO.isDef()) continue;
-    Register Reg = MO.getReg();
-    if (!Reg || !Reg.isPhysical())
-      continue;
-    markRegUsedInInstr(Reg);
-
-    for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) {
-      if (!ThroughRegs.count(RegUnitStates[*UI]))
-        continue;
-
-      // Need to spill any aliasing registers.
-      for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
-        for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) {
-          definePhysReg(MI, *SI, regFree);
-        }
-      }
-    }
-  }
-
-  SmallVector<Register, 8> PartialDefs;
-  LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
-  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
-    MachineOperand &MO = MI.getOperand(I);
-    if (!MO.isReg()) continue;
-    Register Reg = MO.getReg();
-    if (!Register::isVirtualRegister(Reg))
-      continue;
-    if (MO.isUse()) {
-      if (!MO.isTied()) continue;
-      LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
-                        << ") is tied to operand " << MI.findTiedOperandIdx(I)
-                        << ".\n");
-      LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
-      MCPhysReg PhysReg = LR.PhysReg;
-      setPhysReg(MI, MO, PhysReg);
-      // Note: we don't update the def operand yet. That would cause the normal
-      // def-scan to attempt spilling.
-    } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) {
-      LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n');
-      // Reload the register, but don't assign to the operand just yet.
-      // That would confuse the later phys-def processing pass.
-      LiveReg &LR = reloadVirtReg(MI, I, Reg, 0);
-      PartialDefs.push_back(LR.PhysReg);
-    }
-  }
-
-  LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n");
-  for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
-    const MachineOperand &MO = MI.getOperand(I);
-    if (!MO.isReg()) continue;
-    Register Reg = MO.getReg();
-    if (!Register::isVirtualRegister(Reg))
-      continue;
-    if (!MO.isEarlyClobber())
-      continue;
-    // Note: defineVirtReg may invalidate MO.
-    MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0);
-    if (setPhysReg(MI, MI.getOperand(I), PhysReg))
-      VirtDead.push_back(Reg);
-  }
-
-  // Restore UsedInInstr to a state usable for allocating normal virtual uses.
-  UsedInInstr.clear();
-  for (const MachineOperand &MO : MI.operands()) {
-    if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
-    Register Reg = MO.getReg();
-    if (!Reg || !Reg.isPhysical())
-      continue;
-    LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
-                      << " as used in instr\n");
-    markRegUsedInInstr(Reg);
+  if (MO.isDef() && MO.isUndef()) {
+    if (MO.isDead())
+      MI.addRegisterDead(PhysReg, TRI, true);
+    else
+      MI.addRegisterDefined(PhysReg, TRI);
   }
-
-  // Also mark PartialDefs as used to avoid reallocation.
-  for (Register PartialDef : PartialDefs)
-    markRegUsedInInstr(PartialDef);
 }
 
 #ifndef NDEBUG
@@ -955,15 +968,21 @@ void RegAllocFast::dumpState() const {
     switch (unsigned VirtReg = RegUnitStates[Unit]) {
     case regFree:
       break;
-    case regReserved:
+    case regPreAssigned:
       dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
       break;
+    case regLiveIn:
+      llvm_unreachable("Should not have regLiveIn in map");
     default: {
       dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
       LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
       assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
-      if (I->Dirty)
-        dbgs() << "[D]";
+      if (I->LiveOut || I->Reloaded) {
+        dbgs() << '[';
+        if (I->LiveOut) dbgs() << 'O';
+        if (I->Reloaded) dbgs() << 'R';
+        dbgs() << ']';
+      }
       assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
       break;
     }
@@ -986,111 +1005,277 @@ void RegAllocFast::dumpState() const {
 }
 #endif
 
-void RegAllocFast::allocateInstruction(MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-
-  // If this is a copy, we may be able to coalesce.
-  Register CopySrcReg;
-  Register CopyDstReg;
-  unsigned CopySrcSub = 0;
-  unsigned CopyDstSub = 0;
-  if (MI.isCopy()) {
-    CopyDstReg = MI.getOperand(0).getReg();
-    CopySrcReg = MI.getOperand(1).getReg();
-    CopyDstSub = MI.getOperand(0).getSubReg();
-    CopySrcSub = MI.getOperand(1).getSubReg();
+/// Count number of defs consumed from each register class by \p Reg
+void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
+                                        Register Reg) const {
+  assert(RegClassDefCounts.size() == TRI->getNumRegClasses());
+
+  if (Reg.isVirtual()) {
+    const TargetRegisterClass *OpRC = MRI->getRegClass(Reg);
+    for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
+         RCIdx != RCIdxEnd; ++RCIdx) {
+      const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
+      // FIXME: Consider aliasing sub/super registers.
+      if (OpRC->hasSubClassEq(IdxRC))
+        ++RegClassDefCounts[RCIdx];
+    }
+
+    return;
   }
 
-  // Track registers used by instruction.
+  for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
+       RCIdx != RCIdxEnd; ++RCIdx) {
+    const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx);
+    for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
+      if (IdxRC->contains(*Alias)) {
+        ++RegClassDefCounts[RCIdx];
+        break;
+      }
+    }
+  }
+}
+
+void RegAllocFast::allocateInstruction(MachineInstr &MI) {
+  // The basic algorithm here is:
+  // 1. Mark registers of def operands as free
+  // 2. Allocate registers to use operands and place reload instructions for
+  //    registers displaced by the allocation.
+  //
+  // However we need to handle some corner cases:
+  // - pre-assigned defs and uses need to be handled before the other def/use
+  //   operands are processed to avoid the allocation heuristics clashing with
+  //   the pre-assignment.
+  // - The "free def operands" step has to come last instead of first for tied
+  //   operands and early-clobbers.
+
   UsedInInstr.clear();
 
-  // First scan.
-  // Mark physreg uses and early clobbers as used.
-  // Find the end of the virtreg operands
-  unsigned VirtOpEnd = 0;
-  bool hasTiedOps = false;
-  bool hasEarlyClobbers = false;
-  bool hasPartialRedefs = false;
-  bool hasPhysDefs = false;
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    // Make sure MRI knows about registers clobbered by regmasks.
-    if (MO.isRegMask()) {
-      MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
-      continue;
+  // Scan for special cases; Apply pre-assigned register defs to state.
+  bool HasPhysRegUse = false;
+  bool HasRegMask = false;
+  bool HasVRegDef = false;
+  bool HasDef = false;
+  bool HasEarlyClobber = false;
+  bool NeedToAssignLiveThroughs = false;
+  for (MachineOperand &MO : MI.operands()) {
+    if (MO.isReg()) {
+      Register Reg = MO.getReg();
+      if (Reg.isVirtual()) {
+        if (MO.isDef()) {
+          HasDef = true;
+          HasVRegDef = true;
+          if (MO.isEarlyClobber()) {
+            HasEarlyClobber = true;
+            NeedToAssignLiveThroughs = true;
+          }
+          if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef()))
+            NeedToAssignLiveThroughs = true;
+        }
+      } else if (Reg.isPhysical()) {
+        if (!MRI->isReserved(Reg)) {
+          if (MO.isDef()) {
+            HasDef = true;
+            bool displacedAny = definePhysReg(MI, Reg);
+            if (MO.isEarlyClobber())
+              HasEarlyClobber = true;
+            if (!displacedAny)
+              MO.setIsDead(true);
+          }
+          if (MO.readsReg())
+            HasPhysRegUse = true;
+        }
+      }
+    } else if (MO.isRegMask()) {
+      HasRegMask = true;
     }
-    if (!MO.isReg()) continue;
-    Register Reg = MO.getReg();
-    if (!Reg) continue;
-    if (Register::isVirtualRegister(Reg)) {
-      VirtOpEnd = i+1;
-      if (MO.isUse()) {
-        hasTiedOps = hasTiedOps ||
-                            MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
+  }
+
+  // Allocate virtreg defs.
+  if (HasDef) {
+    if (HasVRegDef) {
+      // Special handling for early clobbers, tied operands or subregister defs:
+      // Compared to "normal" defs these:
+      // - Must not use a register that is pre-assigned for a use operand.
+      // - In order to solve tricky inline assembly constraints we change the
+      //   heuristic to figure out a good operand order before doing
+      //   assignments.
+      if (NeedToAssignLiveThroughs) {
+        DefOperandIndexes.clear();
+        PhysRegUses.clear();
+
+        // Track number of defs which may consume a register from the class.
+        std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+        assert(RegClassDefCounts[0] == 0);
+
+        LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
+        for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+          const MachineOperand &MO = MI.getOperand(I);
+          if (!MO.isReg())
+            continue;
+          Register Reg = MO.getReg();
+          if (MO.readsReg()) {
+            if (Reg.isPhysical()) {
+              LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI)
+                                << '\n');
+              markPhysRegUsedInInstr(Reg);
+            }
+          }
+
+          if (MO.isDef()) {
+            if (Reg.isVirtual())
+              DefOperandIndexes.push_back(I);
+
+            addRegClassDefCounts(RegClassDefCounts, Reg);
+          }
+        }
+
+        llvm::sort(DefOperandIndexes.begin(), DefOperandIndexes.end(),
+                   [&](uint16_t I0, uint16_t I1) {
+          const MachineOperand &MO0 = MI.getOperand(I0);
+          const MachineOperand &MO1 = MI.getOperand(I1);
+          Register Reg0 = MO0.getReg();
+          Register Reg1 = MO1.getReg();
+          const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
+          const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
+
+          // Identify regclass that are easy to use up completely just in this
+          // instruction.
+          unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
+          unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
+
+          bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
+          bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
+          if (SmallClass0 > SmallClass1)
+            return true;
+          if (SmallClass0 < SmallClass1)
+            return false;
+
+          // Allocate early clobbers and livethrough operands first.
+          bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
+                              (MO0.getSubReg() == 0 && !MO0.isUndef());
+          bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
+                              (MO1.getSubReg() == 0 && !MO1.isUndef());
+          if (Livethrough0 > Livethrough1)
+            return true;
+          if (Livethrough0 < Livethrough1)
+            return false;
+
+          // Tie-break rule: operand index.
+          return I0 < I1;
+        });
+
+        for (uint16_t OpIdx : DefOperandIndexes) {
+          MachineOperand &MO = MI.getOperand(OpIdx);
+          LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
+          unsigned Reg = MO.getReg();
+          if (MO.isEarlyClobber() || MO.isTied() ||
+              (MO.getSubReg() && !MO.isUndef())) {
+            defineLiveThroughVirtReg(MI, OpIdx, Reg);
+          } else {
+            defineVirtReg(MI, OpIdx, Reg);
+          }
+        }
       } else {
-        if (MO.isEarlyClobber())
-          hasEarlyClobbers = true;
-        if (MO.getSubReg() && MI.readsVirtualRegister(Reg))
-          hasPartialRedefs = true;
+        // Assign virtual register defs.
+        for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+          MachineOperand &MO = MI.getOperand(I);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          Register Reg = MO.getReg();
+          if (Reg.isVirtual())
+            defineVirtReg(MI, I, Reg);
+        }
       }
-      continue;
     }
-    if (!MRI->isAllocatable(Reg)) continue;
-    if (MO.isUse()) {
-      usePhysReg(MO);
-    } else if (MO.isEarlyClobber()) {
-      definePhysReg(MI, Reg,
-                    (MO.isImplicit() || MO.isDead()) ? regFree : regReserved);
-      hasEarlyClobbers = true;
-    } else
-      hasPhysDefs = true;
+
+    // Free registers occupied by defs.
+    // Iterate operands in reverse order, so we see the implicit super register
+    // defs first (we added them earlier in case of <def,read-undef>).
+    for (unsigned I = MI.getNumOperands(); I-- > 0;) {
+      MachineOperand &MO = MI.getOperand(I);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      // subreg defs don't free the full register. We left the subreg number
+      // around as a marker in setPhysReg() to recognize this case here.
+      if (MO.getSubReg() != 0) {
+        MO.setSubReg(0);
+        continue;
+      }
+
+      // Do not free tied operands and early clobbers.
+      if (MO.isTied() || MO.isEarlyClobber())
+        continue;
+      Register Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      assert(Reg.isPhysical());
+      if (MRI->isReserved(Reg))
+        continue;
+      freePhysReg(Reg);
+      unmarkRegUsedInInstr(Reg);
+    }
   }
 
-  // The instruction may have virtual register operands that must be allocated
-  // the same register at use-time and def-time: early clobbers and tied
-  // operands. If there are also physical defs, these registers must avoid
-  // both physical defs and uses, making them more constrained than normal
-  // operands.
-  // Similarly, if there are multiple defs and tied operands, we must make
-  // sure the same register is allocated to uses and defs.
-  // We didn't detect inline asm tied operands above, so just make this extra
-  // pass for all inline asm.
-  if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
-      (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
-    handleThroughOperands(MI, VirtDead);
-    // Don't attempt coalescing when we have funny stuff going on.
-    CopyDstReg = Register();
-    // Pretend we have early clobbers so the use operands get marked below.
-    // This is not necessary for the common case of a single tied use.
-    hasEarlyClobbers = true;
+  // Displace clobbered registers.
+  if (HasRegMask) {
+    for (const MachineOperand &MO : MI.operands()) {
+      if (MO.isRegMask()) {
+        // MRI bookkeeping.
+        MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
+        // Displace clobbered registers.
+        const uint32_t *Mask = MO.getRegMask();
+        for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(),
+             LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) {
+          MCPhysReg PhysReg = LRI->PhysReg;
+          if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg))
+            displacePhysReg(MI, PhysReg);
+        }
+      }
+    }
+  }
+
+  // Apply pre-assigned register uses to state.
+  if (HasPhysRegUse) {
+    for (MachineOperand &MO : MI.operands()) {
+      if (!MO.isReg() || !MO.readsReg())
+        continue;
+      Register Reg = MO.getReg();
+      if (!Reg.isPhysical())
+        continue;
+      if (MRI->isReserved(Reg))
+        continue;
+      bool displacedAny = usePhysReg(MI, Reg);
+      if (!displacedAny && !MRI->isReserved(Reg))
+        MO.setIsKill(true);
+    }
   }
 
-  // Second scan.
-  // Allocate virtreg uses.
+  // Allocate virtreg uses and insert reloads as necessary.
   bool HasUndefUse = false;
-  for (unsigned I = 0; I != VirtOpEnd; ++I) {
+  for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
     MachineOperand &MO = MI.getOperand(I);
-    if (!MO.isReg()) continue;
+    if (!MO.isReg() || !MO.isUse())
+      continue;
     Register Reg = MO.getReg();
     if (!Reg.isVirtual())
       continue;
-    if (MO.isUse()) {
-      if (MO.isUndef()) {
-        HasUndefUse = true;
-        // There is no need to allocate a register for an undef use.
-        continue;
-      }
 
-      // Populate MayLiveAcrossBlocks in case the use block is allocated before
-      // the def block (removing the vreg uses).
-      mayLiveIn(Reg);
-
-      LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
-      MCPhysReg PhysReg = LR.PhysReg;
-      CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
-      if (setPhysReg(MI, MO, PhysReg))
-        killVirtReg(LR);
+    if (MO.isUndef()) {
+      HasUndefUse = true;
+      continue;
     }
+
+
+    // Populate MayLiveAcrossBlocks in case the use block is allocated before
+    // the def block (removing the vreg uses).
+    mayLiveIn(Reg);
+
+
+    assert(!MO.isInternalRead() && "Bundles not supported");
+    assert(MO.readsReg() && "reading use");
+    useVirtReg(MI, I, Reg);
   }
 
   // Allocate undef operands. This is a separate step because in a situation
@@ -1109,76 +1294,40 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
     }
   }
 
-  // Track registers defined by instruction - early clobbers and tied uses at
-  // this point.
-  UsedInInstr.clear();
-  if (hasEarlyClobbers) {
-    for (const MachineOperand &MO : MI.operands()) {
-      if (!MO.isReg()) continue;
-      Register Reg = MO.getReg();
-      if (!Reg || !Reg.isPhysical())
+  // Free early clobbers.
+  if (HasEarlyClobber) {
+    for (unsigned I = MI.getNumOperands(); I-- > 0; ) {
+      MachineOperand &MO = MI.getOperand(I);
+      if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
         continue;
-      // Look for physreg defs and tied uses.
-      if (!MO.isDef() && !MO.isTied()) continue;
-      markRegUsedInInstr(Reg);
-    }
-  }
-
-  unsigned DefOpEnd = MI.getNumOperands();
-  if (MI.isCall()) {
-    // Spill all virtregs before a call. This serves one purpose: If an
-    // exception is thrown, the landing pad is going to expect to find
-    // registers in their spill slots.
-    // Note: although this is appealing to just consider all definitions
-    // as call-clobbered, this is not correct because some of those
-    // definitions may be used later on and we do not want to reuse
-    // those for virtual registers in between.
-    LLVM_DEBUG(dbgs() << "  Spilling remaining registers before call.\n");
-    spillAll(MI, /*OnlyLiveOut*/ false);
-  }
-
-  // Third scan.
-  // Mark all physreg defs as used before allocating virtreg defs.
-  for (unsigned I = 0; I != DefOpEnd; ++I) {
-    const MachineOperand &MO = MI.getOperand(I);
-    if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
-      continue;
-    Register Reg = MO.getReg();
-
-    if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg))
-      continue;
-    definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
-  }
+      // subreg defs don't free the full register. We left the subreg number
+      // around as a marker in setPhysReg() to recognize this case here.
+      if (MO.getSubReg() != 0) {
+        MO.setSubReg(0);
+        continue;
+      }
 
-  // Fourth scan.
-  // Allocate defs and collect dead defs.
-  for (unsigned I = 0; I != DefOpEnd; ++I) {
-    const MachineOperand &MO = MI.getOperand(I);
-    if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
-      continue;
-    Register Reg = MO.getReg();
+      Register Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      assert(Reg.isPhysical() && "should have register assigned");
+
+      // We sometimes get odd situations like:
+      //    early-clobber %x0 = INSTRUCTION %x0
+      // which is semantically questionable as the early-clobber should
+      // apply before the use. But in practice we consider the use to
+      // happen before the early clobber now. Don't free the early clobber
+      // register in this case.
+      if (MI.readsRegister(Reg, TRI))
+        continue;
 
-    // We have already dealt with phys regs in the previous scan.
-    if (Reg.isPhysical())
-      continue;
-    MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
-    if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
-      VirtDead.push_back(Reg);
-      CopyDstReg = Register(); // cancel coalescing;
-    } else
-      CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
+      freePhysReg(Reg);
+    }
   }
 
-  // Kill dead defs after the scan to ensure that multiple defs of the same
-  // register are allocated identically. We didn't need to do this for uses
-  // because we are creating our own kill flags, and they are always at the last
-  // use.
-  for (Register VirtReg : VirtDead)
-    killVirtReg(VirtReg);
-  VirtDead.clear();
-
   LLVM_DEBUG(dbgs() << "<< " << MI);
-  if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) {
+  if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
+      MI.getNumOperands() == 2) {
     LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n");
     Coalesced.push_back(&MI);
   }
@@ -1195,23 +1344,22 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
   if (!Register::isVirtualRegister(Reg))
     return;
 
+  // Already spilled to a stackslot?
+  int SS = StackSlotForVirtReg[Reg];
+  if (SS != -1) {
+    // Modify DBG_VALUE now that the value is in a spill slot.
+    updateDbgValueForSpill(MI, SS);
+    LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI);
+    return;
+  }
+
   // See if this virtual register has already been allocated to a physical
   // register or spilled to a stack slot.
   LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
   if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
     setPhysReg(MI, MO, LRI->PhysReg);
   } else {
-    int SS = StackSlotForVirtReg[Reg];
-    if (SS != -1) {
-      // Modify DBG_VALUE now that the value is in a spill slot.
-      updateDbgValueForSpill(MI, SS);
-      LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI);
-      return;
-    }
-
-    // We can't allocate a physreg for a DebugValue, sorry!
-    LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
-    MO.setReg(Register());
+    DanglingDbgValues[Reg].push_back(&MI);
   }
 
   // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
@@ -1219,6 +1367,17 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
   LiveDbgValueMap[Reg].push_back(&MI);
 }
 
+#ifndef NDEBUG
+bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
+  for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
+    if (RegUnitStates[*UI] != LR.VirtReg)
+      return false;
+  }
+
+  return true;
+}
+#endif
+
 void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
   this->MBB = &MBB;
   LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
@@ -1226,18 +1385,15 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
   RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
   assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
 
-  MachineBasicBlock::iterator MII = MBB.begin();
-
-  // Add live-in registers as live.
-  for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins())
-    if (MRI->isAllocatable(LI.PhysReg))
-      definePhysReg(MII, LI.PhysReg, regReserved);
+  for (MachineBasicBlock *Succ : MBB.successors()) {
+    for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins())
+      setPhysRegState(LI.PhysReg, regPreAssigned);
+  }
 
-  VirtDead.clear();
   Coalesced.clear();
 
-  // Otherwise, sequentially allocate each instruction in the MBB.
-  for (MachineInstr &MI : MBB) {
+  // Traverse block in reverse order allocating instructions one by one.
+  for (MachineInstr &MI : reverse(MBB)) {
     LLVM_DEBUG(
       dbgs() << "\n>> " << MI << "Regs:";
       dumpState()
@@ -1253,9 +1409,14 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
     allocateInstruction(MI);
   }
 
+  LLVM_DEBUG(
+    dbgs() << "Begin Regs:";
+    dumpState()
+  );
+
   // Spill all physical registers holding virtual registers now.
-  LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
-  spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true);
+  LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n");
+  reloadAtBegin(MBB);
 
   // Erase all the coalesced copies. We are delaying it until now because
   // LiveVirtRegs might refer to the instrs.
@@ -1263,6 +1424,20 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
     MBB.erase(MI);
   NumCoalesced += Coalesced.size();
 
+  for (auto &UDBGPair : DanglingDbgValues) {
+    for (MachineInstr *DbgValue : UDBGPair.second) {
+      assert(DbgValue->isDebugValue() && "expected DBG_VALUE");
+      MachineOperand &MO = DbgValue->getOperand(0);
+      // Nothing to do if the vreg was spilled in the meantime.
+      if (!MO.isReg())
+        continue;
+      LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue
+                 << '\n');
+      MO.setReg(0);
+    }
+  }
+  DanglingDbgValues.clear();
+
   LLVM_DEBUG(MBB.dump());
 }
 
@@ -1276,8 +1451,11 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
   MFI = &MF.getFrameInfo();
   MRI->freezeReservedRegs(MF);
   RegClassInfo.runOnMachineFunction(MF);
+  unsigned NumRegUnits = TRI->getNumRegUnits();
   UsedInInstr.clear();
-  UsedInInstr.setUniverse(TRI->getNumRegUnits());
+  UsedInInstr.setUniverse(NumRegUnits);
+  PhysRegUses.clear();
+  PhysRegUses.setUniverse(NumRegUnits);
 
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
index cbeac5d85fc4..296795b32761 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
@@ -31,9 +31,8 @@ target triple = "arm64-apple-ios13.0.0"
 ; This test checks that we don't re-use the register for the variable descriptor
 ; for the second ldr.
 ; CHECK:        adrp	x[[PTR1:[0-9]+]], _t_val at TLVPPAGE
-; CHECK:	ldr	x[[PTR1]], [x[[PTR1]], _t_val at TLVPPAGEOFF]
-; CHECK:	ldr	x[[FPTR:[0-9]+]], [x[[PTR1]]]
-; CHECK:        mov	x0, x[[PTR1]]
+; CHECK:	ldr	x0, [x[[PTR1]], _t_val at TLVPPAGEOFF]
+; CHECK:	ldr	x[[FPTR:[0-9]+]], [x0]
 ; CHECK:        blr     x[[FPTR]]
 
 define void @_Z4funcPKc(i8* %id) {

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
index 805ba09bace2..d563ccb851ce 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -94,7 +94,7 @@ entry:
   store i32 %c, i32* %c.addr, align 4
   store i64 %d, i64* %d.addr, align 8
   %0 = load i16, i16* %b.addr, align 2
-; CHECK: tbz w8, #0, LBB4_2
+; CHECK: tbz {{w[0-9]+}}, #0, LBB4_2
   %conv = trunc i16 %0 to i1
   br i1 %conv, label %if.then, label %if.end
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
index 6b5799bdefd9..586b7d116f5c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
@@ -79,8 +79,7 @@ declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8
 define i32 @t2() {
 entry:
 ; CHECK-LABEL: t2
-; CHECK:       mov [[REG1:x[0-9]+]], xzr
-; CHECK:       mov x0, [[REG1]]
+; CHECK:       mov x0, xzr
 ; CHECK:       mov w1, #-8
 ; CHECK:       mov [[REG2:w[0-9]+]], #1023
 ; CHECK:       uxth w2, [[REG2]]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
index 7c546936ba27..b3c073f53542 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll
@@ -4,9 +4,8 @@
 define i32 @fptosi_wh(half %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: fptosi_wh
-; CHECK: fcvt s0, h0
-; CHECK: fcvtzs [[REG:w[0-9]+]], s0
-; CHECK: mov w0, [[REG]]
+; CHECK: fcvt [[REG:s[0-9]+]], h0
+; CHECK: fcvtzs w0, [[REG]]
   %conv = fptosi half %a to i32
   ret i32 %conv
 }
@@ -15,9 +14,8 @@ entry:
 define i32 @fptoui_swh(half %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: fptoui_swh
-; CHECK: fcvt s0, h0
-; CHECK: fcvtzu [[REG:w[0-9]+]], s0
-; CHECK: mov w0, [[REG]]
+; CHECK: fcvt [[REG:s[0-9]+]], h0
+; CHECK: fcvtzu w0, [[REG]]
   %conv = fptoui half %a to i32
   ret i32 %conv
 }
@@ -26,8 +24,8 @@ entry:
 define half @sitofp_hw_i1(i1 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: sitofp_hw_i1
-; CHECK: sbfx w8, w0, #0, #1
-; CHECK: scvtf s0, w8
+; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
+; CHECK: scvtf s0, [[REG]]
 ; CHECK: fcvt  h0, s0
   %conv = sitofp i1 %a to half
   ret half %conv
@@ -37,8 +35,8 @@ entry:
 define half @sitofp_hw_i8(i8 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: sitofp_hw_i8
-; CHECK: sxtb w8, w0
-; CHECK: scvtf s0, w8
+; CHECK: sxtb [[REG:w[0-9]+]], w0
+; CHECK: scvtf s0, [[REG]]
 ; CHECK: fcvt  h0, s0
   %conv = sitofp i8 %a to half
   ret half %conv
@@ -48,8 +46,8 @@ entry:
 define half @sitofp_hw_i16(i16 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: sitofp_hw_i16
-; CHECK: sxth w8, w0
-; CHECK: scvtf s0, w8
+; CHECK: sxth [[REG:w[0-9]+]], w0
+; CHECK: scvtf s0, [[REG]]
 ; CHECK: fcvt  h0, s0
   %conv = sitofp i16 %a to half
   ret half %conv
@@ -79,8 +77,8 @@ entry:
 define half @uitofp_hw_i1(i1 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: uitofp_hw_i1
-; CHECK: and w8, w0, #0x1
-; CHECK: ucvtf s0, w8
+; CHECK: and [[REG:w[0-9]+]], w0, #0x1
+; CHECK: ucvtf s0, [[REG]]
 ; CHECK: fcvt  h0, s0
   %conv = uitofp i1 %a to half
   ret half %conv
@@ -90,8 +88,8 @@ entry:
 define half @uitofp_hw_i8(i8 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: uitofp_hw_i8
-; CHECK: and w8, w0, #0xff
-; CHECK: ucvtf s0, w8
+; CHECK: and [[REG:w[0-9]+]], w0, #0xff
+; CHECK: ucvtf s0, [[REG]]
 ; CHECK: fcvt  h0, s0
   %conv = uitofp i8 %a to half
   ret half %conv
@@ -101,8 +99,8 @@ entry:
 define half @uitofp_hw_i16(i16 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: uitofp_hw_i16
-; CHECK: and w8, w0, #0xffff
-; CHECK: ucvtf s0, w8
+; CHECK: and [[REG:w[0-9]+]], w0, #0xffff
+; CHECK: ucvtf s0, [[REG]]
 ; CHECK: fcvt  h0, s0
   %conv = uitofp i16 %a to half
   ret half %conv

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
index d8abf14c1366..26ce3a3b94aa 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck -enable-var-scope %s
 
 ;; Test various conversions.
 define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
@@ -49,13 +49,12 @@ entry:
 ; CHECK: strh w1, [sp, #12]
 ; CHECK: str w2, [sp, #8]
 ; CHECK: str x3, [sp]
-; CHECK: ldrb w8, [sp, #15]
-; CHECK: strh w8, [sp, #12]
-; CHECK: ldrh w8, [sp, #12]
-; CHECK: str w8, [sp, #8]
-; CHECK: ldr w8, [sp, #8]
-; CHECK: ; kill: def $x8 killed $w8
-; CHECK: str x8, [sp]
+; CHECK: ldrb [[REG0:w[0-9]+]], [sp, #15]
+; CHECK: strh [[REG0]], [sp, #12]
+; CHECK: ldrh [[REG1:w[0-9]+]], [sp, #12]
+; CHECK: str [[REG1]], [sp, #8]
+; CHECK: ldr w[[REG2:[0-9]+]], [sp, #8]
+; CHECK: str x[[REG2]], [sp]
 ; CHECK: ldr x0, [sp]
 ; CHECK: ret
   %a.addr = alloca i8, align 1
@@ -105,12 +104,12 @@ entry:
 ; CHECK: strh w1, [sp, #12]
 ; CHECK: str w2, [sp, #8]
 ; CHECK: str x3, [sp]
-; CHECK: ldrsb w8, [sp, #15]
-; CHECK: strh w8, [sp, #12]
-; CHECK: ldrsh w8, [sp, #12]
-; CHECK: str w8, [sp, #8]
-; CHECK: ldrsw x8, [sp, #8]
-; CHECK: str x8, [sp]
+; CHECK: ldrsb [[REG0:w[0-9]+]], [sp, #15]
+; CHECK: strh [[REG0]], [sp, #12]
+; CHECK: ldrsh [[REG1:w[0-9]+]], [sp, #12]
+; CHECK: str [[REG1]], [sp, #8]
+; CHECK: ldrsw [[REG2:x[0-9]+]], [sp, #8]
+; CHECK: str [[REG2]], [sp]
 ; CHECK: ldr x0, [sp]
 ; CHECK: ret
   %a.addr = alloca i8, align 1
@@ -166,8 +165,8 @@ entry:
 define signext i16 @sext_i1_i16(i1 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: sext_i1_i16
-; CHECK: sbfx w8, w0, #0, #1
-; CHECK-NEXT: sxth	w0, w8
+; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
+; CHECK: sxth w0, [[REG]]
   %conv = sext i1 %a to i16
   ret i16 %conv
 }
@@ -176,8 +175,8 @@ entry:
 define signext i8 @sext_i1_i8(i1 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: sext_i1_i8
-; CHECK: sbfx w8, w0, #0, #1
-; CHECK-NEXT: sxtb w0, w8
+; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
+; CHECK: sxtb w0, [[REG]]
   %conv = sext i1 %a to i8
   ret i8 %conv
 }
@@ -240,8 +239,8 @@ entry:
 define float @sitofp_sw_i1(i1 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: sitofp_sw_i1
-; CHECK: sbfx w8, w0, #0, #1
-; CHECK: scvtf s0, w8
+; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1
+; CHECK: scvtf s0, [[REG]]
   %conv = sitofp i1 %a to float
   ret float %conv
 }
@@ -250,8 +249,8 @@ entry:
 define float @sitofp_sw_i8(i8 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: sitofp_sw_i8
-; CHECK: sxtb w8, w0
-; CHECK: scvtf s0, w8
+; CHECK: sxtb [[REG:w[0-9]+]], w0
+; CHECK: scvtf s0, [[REG]]
   %conv = sitofp i8 %a to float
   ret float %conv
 }
@@ -304,8 +303,8 @@ entry:
 define float @uitofp_sw_i1(i1 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: uitofp_sw_i1
-; CHECK: and w8, w0, #0x1
-; CHECK: ucvtf s0, w8
+; CHECK: and [[REG:w[0-9]+]], w0, #0x1
+; CHECK: ucvtf s0, [[REG]]
   %conv = uitofp i1 %a to float
   ret float %conv
 }
@@ -374,7 +373,8 @@ entry:
 define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: i64_trunc_i16
-; CHECK: and [[REG2:w[0-9]+]], w0, #0xffff
+; CHECK: mov x[[TMP:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xffff{{$}}
 ; CHECK: uxth w0, [[REG2]]
   %conv = trunc i64 %a to i16
   ret i16 %conv
@@ -383,7 +383,8 @@ entry:
 define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: i64_trunc_i8
-; CHECK: and [[REG2:w[0-9]+]], w0, #0xff
+; CHECK: mov x[[TMP:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xff{{$}}
 ; CHECK: uxtb w0, [[REG2]]
   %conv = trunc i64 %a to i8
   ret i8 %conv
@@ -392,7 +393,8 @@ entry:
 define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp {
 entry:
 ; CHECK-LABEL: i64_trunc_i1
-; CHECK: and [[REG2:w[0-9]+]], w0, #0x1
+; CHECK: mov x[[TMP:[0-9]+]], x0
+; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0x1{{$}}
 ; CHECK: and w0, [[REG2]], #0x1
   %conv = trunc i64 %a to i1
   ret i1 %conv

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index e1e889b906c0..8d35af2737b4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -210,10 +210,10 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun
 ;
 ; FAST-LABEL: test_vcvt_high_f32_f64:
 ; FAST:       // %bb.0:
-; FAST-NEXT:    // implicit-def: $q2
 ; FAST-NEXT:    mov.16b v2, v0
-; FAST-NEXT:    fcvtn2 v2.4s, v1.2d
+; FAST-NEXT:    // implicit-def: $q0
 ; FAST-NEXT:    mov.16b v0, v2
+; FAST-NEXT:    fcvtn2 v0.4s, v1.2d
 ; FAST-NEXT:    ret
 ;
 ; GISEL-LABEL: test_vcvt_high_f32_f64:
@@ -249,10 +249,10 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou
 ;
 ; FAST-LABEL: test_vcvtx_high_f32_f64:
 ; FAST:       // %bb.0:
-; FAST-NEXT:    // implicit-def: $q2
 ; FAST-NEXT:    mov.16b v2, v0
-; FAST-NEXT:    fcvtxn2 v2.4s, v1.2d
+; FAST-NEXT:    // implicit-def: $q0
 ; FAST-NEXT:    mov.16b v0, v2
+; FAST-NEXT:    fcvtxn2 v0.4s, v1.2d
 ; FAST-NEXT:    ret
 ;
 ; GISEL-LABEL: test_vcvtx_high_f32_f64:
@@ -283,17 +283,12 @@ define i16 @to_half(float %in) {
 ;
 ; FAST-LABEL: to_half:
 ; FAST:       // %bb.0:
-; FAST-NEXT:    sub sp, sp, #16 // =16
-; FAST-NEXT:    .cfi_def_cfa_offset 16
-; FAST-NEXT:    fcvt h0, s0
+; FAST-NEXT:    fcvt h1, s0
 ; FAST-NEXT:    // implicit-def: $w0
-; FAST-NEXT:    fmov s1, w0
-; FAST-NEXT:    mov.16b v1, v0
-; FAST-NEXT:    fmov w8, s1
-; FAST-NEXT:    mov w0, w8
-; FAST-NEXT:    str w0, [sp, #12] // 4-byte Folded Spill
-; FAST-NEXT:    mov w0, w8
-; FAST-NEXT:    add sp, sp, #16 // =16
+; FAST-NEXT:    fmov s0, w0
+; FAST-NEXT:    mov.16b v0, v1
+; FAST-NEXT:    fmov w0, s0
+; FAST-NEXT:    // kill: def $w1 killed $w0
 ; FAST-NEXT:    ret
 ;
 ; GISEL-LABEL: to_half:

diff  --git a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll
index 0467a2cba831..3c71ee1ee58c 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll
@@ -17,8 +17,9 @@ declare [2 x i32] @callee()
 define void @test_struct_return(i32* %addr) {
 ; CHECK-LABEL: test_struct_return:
 ; CHECK: bl _callee
-; CHECK-DAG: lsr [[HI:x[0-9]+]], x0, #32
-; CHECK-DAG: str w0
+; CHECK: x[[COPYX0:[0-9]+]], x0
+; CHECK-DAG: lsr [[HI:x[0-9]+]], x[[COPYX0]], #32
+; CHECK-DAG: str w[[COPYX0]]
   %res = call [2 x i32] @callee()
   %res.0 = extractvalue [2 x i32] %res, 0
   store i32 %res.0, i32* %addr

diff  --git a/llvm/test/CodeGen/AArch64/arm64_32-null.ll b/llvm/test/CodeGen/AArch64/arm64_32-null.ll
index 9d62c56248b5..6360b6298160 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-null.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-null.ll
@@ -13,11 +13,12 @@ define void @test_store(i8** %p) {
 define void @test_phi(i8** %p) {
 ; CHECK-LABEL: test_phi:
 ; CHECK: mov [[R1:x[0-9]+]], xzr
-; CHECK: str [[R1]], [sp]
+; CHECK: str [[R1]], [sp, #8]
 ; CHECK: b [[BB:LBB[0-9_]+]]
 ; CHECK: [[BB]]:
-; CHECK: ldr x0, [sp]
-; CHECK: str w0, [x{{.*}}]
+; CHECK: ldr x0, [sp, #8]
+; CHECK: mov w8, w0
+; CHECK: str w8, [x{{.*}}]
 
 bb0:
   br label %bb1

diff  --git a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
index 46532386783f..9edf9e6d82df 100644
--- a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
+++ b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll
@@ -64,9 +64,9 @@ bb3:
 ; OPT: b.gt [[L:\.LBB[0-9_]+]]
 ; OPT: tbz w1, #0, [[L]]
 ;
+; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]]
 ; NOOPT: subs w{{[0-9]+}}, w{{[0-9]+}}, #0
 ; NOOPT: cset [[R1:w[0-9]+]], gt
-; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]]
 ; NOOPT: str [[R1]], [sp, #[[SLOT1:[0-9]+]]]
 ; NOOPT: b .LBB
 ; NOOPT: ldr [[R2:w[0-9]+]], [sp, #[[SLOT1]]]

diff  --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
index bfb7b5809f21..43e36dd88209 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll
@@ -1,16 +1,16 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s
 
 define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_8:
+; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     mov [[STATUS:w[3-9]+]], #0
-; CHECK:     ldaxrb [[OLD:w[0-9]+]], [x0]
+; CHECK:     ldaxrb [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD]], w1, uxtb
 ; CHECK:     b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxrb [[STATUS]], w2, [x0]
+; CHECK:     stlxrb [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK: [[DONE]]:
-; CHECK:     subs {{w[0-9]+}}, [[OLD]], w1
+; CHECK:     subs {{w[0-9]+}}, [[OLD]], w1, uxtb
 ; CHECK:     cset {{w[0-9]+}}, eq
   %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
   ret { i8, i1 } %res
@@ -18,12 +18,12 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 
 define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_16:
+; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     mov [[STATUS:w[3-9]+]], #0
-; CHECK:     ldaxrh [[OLD:w[0-9]+]], [x0]
+; CHECK:     ldaxrh [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD]], w1, uxth
 ; CHECK:     b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxrh [[STATUS:w[3-9]]], w2, [x0]
+; CHECK:     stlxrh [[STATUS:w[3-9]]], w2, {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK: [[DONE]]:
 ; CHECK:     subs {{w[0-9]+}}, [[OLD]], w1
@@ -34,12 +34,12 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
 
 define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_32:
+; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     mov [[STATUS:w[3-9]+]], #0
-; CHECK:     ldaxr [[OLD:w[0-9]+]], [x0]
+; CHECK:     ldaxr [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD]], w1
 ; CHECK:     b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxr [[STATUS]], w2, [x0]
+; CHECK:     stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK: [[DONE]]:
 ; CHECK:     subs {{w[0-9]+}}, [[OLD]], w1
@@ -50,12 +50,12 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind
 
 define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_64:
+; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     mov [[STATUS:w[3-9]+]], #0
-; CHECK:     ldaxr [[OLD:x[0-9]+]], [x0]
+; CHECK:     ldaxr [[OLD:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD]], x1
 ; CHECK:     b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxr [[STATUS]], x2, [x0]
+; CHECK:     stlxr [[STATUS:w[0-9]+]], x2, {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK: [[DONE]]:
 ; CHECK:     subs {{x[0-9]+}}, [[OLD]], x1
@@ -66,14 +66,15 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind
 
 define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_128:
+; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0]
+; CHECK:     ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD_LO]], x2
 ; CHECK:     cset [[CMP_TMP:w[0-9]+]], ne
 ; CHECK:     cmp [[OLD_HI]], x3
 ; CHECK:     cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne
 ; CHECK:     cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxp [[STATUS:w[0-9]+]], x4, x5, [x0]
+; CHECK:     stlxp [[STATUS:w[0-9]+]], x4, x5, {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK: [[DONE]]:
   %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst monotonic
@@ -86,17 +87,18 @@ define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nou
 @var128 = global i128 0
 define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) {
 ; CHECK-LABEL: test_cmpxchg_128_unsplit:
+; CHECK:     mov [[ADDR:x[0-9]+]], x0
 ; CHECK:     add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
 ; CHECK:     ldp [[DESIRED_LO:x[0-9]+]], [[DESIRED_HI:x[0-9]+]], [x[[VAR128]]]
 ; CHECK:     ldp [[NEW_LO:x[0-9]+]], [[NEW_HI:x[0-9]+]], [x[[VAR128]]]
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0]
+; CHECK:     ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD_LO]], [[DESIRED_LO]]
 ; CHECK:     cset [[CMP_TMP:w[0-9]+]], ne
 ; CHECK:     cmp [[OLD_HI]], [[DESIRED_HI]]
 ; CHECK:     cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne
 ; CHECK:     cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], [x0]
+; CHECK:     stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK: [[DONE]]:
 

diff  --git a/llvm/test/CodeGen/AArch64/combine-loads.ll b/llvm/test/CodeGen/AArch64/combine-loads.ll
index 22a71f5701f1..c94751d77982 100644
--- a/llvm/test/CodeGen/AArch64/combine-loads.ll
+++ b/llvm/test/CodeGen/AArch64/combine-loads.ll
@@ -6,10 +6,10 @@ define <2 x i64> @z(i64* nocapture nonnull readonly %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    ldr x9, [x0, #8]
-; CHECK-NEXT:    mov v0.d[0], x8
-; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    ldr x9, [x0]
+; CHECK-NEXT:    ldr x8, [x0, #8]
+; CHECK-NEXT:    mov v0.d[0], x9
+; CHECK-NEXT:    mov v0.d[1], x8
 ; CHECK-NEXT:    ret
   %b = load i64, i64* %p
   %p2 = getelementptr i64, i64* %p, i64 1

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
index f03955c4dcd3..82e3c2d4d61a 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll
@@ -1,20 +1,19 @@
 ; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s
 
 ; CHECK-LABEL: cmpxchg_monotonic_32:
+; CHECK:          mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT:     mov [[STATUS:w[0-9]+]], #0
-; CHECK-NEXT:     ldaxr [[OLD:w[0-9]+]], [x0]
-; CHECK-NEXT:     cmp [[OLD]], w1
+; CHECK-NEXT:     ldaxr w0, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT:     cmp w0, w1
 ; CHECK-NEXT:     b.ne [[DONE:.LBB[0-9_]+]]
 ; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT:     stlxr [[STATUS]], w2, [x0]
+; CHECK-NEXT:     stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}}
 ; CHECK-NEXT:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK-NEXT: [[DONE]]:
-; CHECK-NEXT:     cmp [[OLD]], w1
-; CHECK-NEXT:     cset [[STATUS:w[0-9]+]], eq
+; CHECK-NEXT:     cmp w0, w1
+; CHECK-NEXT:     cset [[STATUS]], eq
 ; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
 ; CHECK-NEXT:     str [[STATUS32]], [x3]
-; CHECK-NEXT:     mov w0, [[OLD]]
 define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 {
   %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic
   %tmp1 = extractvalue { i32, i1 } %tmp0, 0
@@ -26,21 +25,20 @@ define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 {
 
 ; CHECK-LABEL: cmpxchg_acq_rel_32_load:
 ; CHECK:      // %bb.0:
-; CHECK:     ldr [[NEW:w[0-9]+]], [x2]
+; CHECK:          mov [[ADDR:x[0-9]+]], x0
+; CHECK:          ldr [[NEW:w[0-9]+]], [x2]
 ; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT:     mov [[STATUS:w[0-9]+]], #0
-; CHECK-NEXT:     ldaxr [[OLD:w[0-9]+]], [x0]
-; CHECK-NEXT:     cmp [[OLD]], w1
+; CHECK-NEXT:     ldaxr w0, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT:     cmp w0, w1
 ; CHECK-NEXT:     b.ne [[DONE:.LBB[0-9_]+]]
 ; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT:     stlxr [[STATUS]], [[NEW]], [x0]
+; CHECK-NEXT:     stlxr [[STATUS:w[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK-NEXT:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK-NEXT: [[DONE]]:
-; CHECK-NEXT:     cmp [[OLD]], w1
-; CHECK-NEXT:     cset [[STATUS:w[0-9]+]], eq
+; CHECK-NEXT:     cmp w0, w1
+; CHECK-NEXT:     cset [[STATUS]], eq
 ; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
 ; CHECK-NEXT:     str [[STATUS32]], [x3]
-; CHECK-NEXT:     mov w0, [[OLD]]
 define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 {
   %new = load i32, i32* %pnew
   %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire
@@ -52,20 +50,19 @@ define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0
 }
 
 ; CHECK-LABEL: cmpxchg_seq_cst_64:
+; CHECK: mov [[ADDR:x[0-9]+]], x0
 ; CHECK: [[RETRY:.LBB[0-9_]+]]:
-; CHECK-NEXT:     mov [[STATUS:w[0-9]+]], #0
-; CHECK-NEXT:     ldaxr [[OLD:x[0-9]+]], [x0]
-; CHECK-NEXT:     cmp [[OLD]], x1
+; CHECK-NEXT:     ldaxr x0, {{\[}}[[ADDR]]{{\]}}
+; CHECK-NEXT:     cmp x0, x1
 ; CHECK-NEXT:     b.ne [[DONE:.LBB[0-9_]+]]
 ; CHECK-NEXT: // %bb.2:
-; CHECK-NEXT:     stlxr [[STATUS]], x2, [x0]
+; CHECK-NEXT:     stlxr [[STATUS]], x2, {{\[}}[[ADDR]]{{\]}}
 ; CHECK-NEXT:     cbnz [[STATUS]], [[RETRY]]
 ; CHECK-NEXT: [[DONE]]:
-; CHECK-NEXT:     cmp [[OLD]], x1
+; CHECK-NEXT:     cmp x0, x1
 ; CHECK-NEXT:     cset [[STATUS:w[0-9]+]], eq
 ; CHECK-NEXT:     and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1
 ; CHECK-NEXT:     str [[STATUS32]], [x3]
-; CHECK-NEXT:     mov x0, [[OLD]]
 define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 {
   %tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst
   %tmp1 = extractvalue { i64, i1 } %tmp0, 0

diff  --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 105969717e46..2e5e988f0576 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -6,15 +6,15 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) {
 ; CHECK-LABEL: popcount128:
 ; CHECK:       // %bb.0: // %Entry
 ; CHECK-NEXT:    ldr x8, [x0, #8]
-; CHECK-NEXT:    ldr d0, [x0]
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    mov v1.d[1], x8
-; CHECK-NEXT:    cnt v0.16b, v1.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    fmov w0, s1
+; CHECK-NEXT:    ldr d1, [x0]
+; CHECK-NEXT:    // implicit-def: $q0
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    cnt v0.16b, v0.16b
+; CHECK-NEXT:    uaddlv h1, v0.16b
+; CHECK-NEXT:    // implicit-def: $q0
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
 Entry:
   %1 = load i128, i128* %0, align 16
@@ -32,24 +32,24 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) {
 ; CHECK:       // %bb.0: // %Entry
 ; CHECK-NEXT:    ldr x8, [x0, #8]
 ; CHECK-NEXT:    ldr x9, [x0, #24]
-; CHECK-NEXT:    ldr d0, [x0, #16]
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    mov v1.d[1], x9
-; CHECK-NEXT:    cnt v0.16b, v1.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    fmov w9, s1
-; CHECK-NEXT:    ldr d0, [x0]
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    mov v1.d[1], x8
-; CHECK-NEXT:    cnt v0.16b, v1.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    fmov w8, s1
+; CHECK-NEXT:    ldr d1, [x0, #16]
+; CHECK-NEXT:    // implicit-def: $q0
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    mov v0.d[1], x9
+; CHECK-NEXT:    cnt v0.16b, v0.16b
+; CHECK-NEXT:    uaddlv h1, v0.16b
+; CHECK-NEXT:    // implicit-def: $q0
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    fmov w9, s0
+; CHECK-NEXT:    ldr d1, [x0]
+; CHECK-NEXT:    // implicit-def: $q0
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    cnt v0.16b, v0.16b
+; CHECK-NEXT:    uaddlv h1, v0.16b
+; CHECK-NEXT:    // implicit-def: $q0
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    add w0, w8, w9
 ; CHECK-NEXT:    ret
 Entry:
@@ -69,10 +69,10 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    mov v0.d[1], x1
 ; CHECK-NEXT:    cnt v0.16b, v0.16b
-; CHECK-NEXT:    uaddlv h0, v0.16b
-; CHECK-NEXT:    // implicit-def: $q1
-; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    fmov w0, s1
+; CHECK-NEXT:    uaddlv h1, v0.16b
+; CHECK-NEXT:    // implicit-def: $q0
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    // kill: def $x0 killed $w0
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov x1, v0.d[1]

diff  --git a/llvm/test/CodeGen/AArch64/swift-return.ll b/llvm/test/CodeGen/AArch64/swift-return.ll
index 2bf5e379b379..2036faf39bdd 100644
--- a/llvm/test/CodeGen/AArch64/swift-return.ll
+++ b/llvm/test/CodeGen/AArch64/swift-return.ll
@@ -203,10 +203,10 @@ declare swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen6()
 ; CHECK-DAG:   mov      w3, w0
 ; CHECK:   ret
 ; CHECK-O0-LABEL: _gen7
-; CHECK-O0:  str     w0, [sp, #12]
-; CHECK-O0:  ldr     w1, [sp, #12]
-; CHECK-O0:  ldr     w2, [sp, #12]
-; CHECK-O0:  ldr     w3, [sp, #12]
+; CHECK-O0:  mov w3, w0
+; CHECK-O0:  mov w0, w3
+; CHECK-O0:  mov w1, w3
+; CHECK-O0:  mov w2, w3
 define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) {
   %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0
   %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1
@@ -221,10 +221,10 @@ define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) {
 ; CHECK:  mov      w3, w0
 ; CHECK:  ret
 ; CHECK-O0-LABEL: _gen9
-; CHECK-O0:  str     w0, [sp, #12]
-; CHECK-O0:  ldr     w1, [sp, #12]
-; CHECK-O0:  ldr     w2, [sp, #12]
-; CHECK-O0:  ldr     w3, [sp, #12]
+; CHECK-O0:  mov w3, w0
+; CHECK-O0:  mov w0, w3
+; CHECK-O0:  mov w1, w3
+; CHECK-O0:  mov w2, w3
 define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) {
   %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0
   %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1

diff  --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll
index a8635f682ff1..e219ef770f93 100644
--- a/llvm/test/CodeGen/AArch64/swifterror.ll
+++ b/llvm/test/CodeGen/AArch64/swifterror.ll
@@ -21,11 +21,10 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-O0-LABEL: foo:
 ; CHECK-O0: mov w{{.*}}, #16
 ; CHECK-O0: malloc
-; CHECK-O0: mov x1, x0
-; CHECK-O0-NOT: x1
+; CHECK-O0: mov x21, x0
+; CHECK-O0-NOT: x21
 ; CHECK-O0: mov [[ID:w[0-9]+]], #1
 ; CHECK-O0: strb [[ID]], [x0, #8]
-; CHECK-O0: mov x21, x1
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -138,14 +137,12 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-O0: cbz w0
 ; CHECK-O0: mov w{{.*}}, #16
 ; CHECK-O0: malloc
-; CHECK-O0: mov [[ID:x[0-9]+]], x0
+; CHECK-O0: mov x21, x0
 ; CHECK-O0: mov [[ID2:w[0-9]+]], #1
 ; CHECK-O0: strb [[ID2]], [x0, #8]
-; CHECK-O0: mov x21, [[ID]]
 ; CHECK-O0: ret
 ; reload from stack
-; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT]]]
-; CHECK-O0: mov x21, [[ID3]]
+; CHECK-O0: ldr x21, [sp, [[SLOT]]]
 ; CHECK-O0: ret
 entry:
   %cond = icmp ne i32 %cc, 0
@@ -179,10 +176,10 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
 
 ; CHECK-O0-AARCH64-LABEL: foo_loop:
 ; spill x21
-; CHECK-O0-AARCH64: str x21, [sp, [[SLOT:#[0-9]+]]]
+; CHECK-O0-AARCH64: stur x21, [x29, [[SLOT:#-[0-9]+]]]
 ; CHECK-O0-AARCH64: b [[BB1:[A-Za-z0-9_]*]]
 ; CHECK-O0-AARCH64: [[BB1]]:
-; CHECK-O0-AARCH64: ldr     x0, [sp, [[SLOT]]]
+; CHECK-O0-AARCH64: ldur    x0, [x29, [[SLOT]]]
 ; CHECK-O0-AARCH64: str     x0, [sp, [[SLOT2:#[0-9]+]]]
 ; CHECK-O0-AARCH64: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]]
 ; CHECK-O0-AARCH64: mov w{{.*}}, #16
@@ -194,11 +191,10 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
 ; CHECK-O0-AARCH64:[[BB2]]:
 ; CHECK-O0-AARCH64: ldr     x0, [sp, [[SLOT2]]]
 ; CHECK-O0-AARCH64: fcmp
-; CHECK-O0-AARCH64: str     x0, [sp]
+; CHECK-O0-AARCH64: stur     x0, [x29, [[SLOT]]]
 ; CHECK-O0-AARCH64: b.le [[BB1]]
 ; reload from stack
-; CHECK-O0-AARCH64: ldr [[ID3:x[0-9]+]], [sp]
-; CHECK-O0-AARCH64: mov x21, [[ID3]]
+; CHECK-O0-AARCH64: ldr x21, [sp]
 ; CHECK-O0-AARCH64: ret
 
 ; CHECK-O0-ARM64_32-LABEL: foo_loop:
@@ -215,14 +211,12 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float
 ; CHECK-O0-ARM64_32: strb w{{.*}},
 ; CHECK-O0-ARM64_32:[[BB2]]:
 ; CHECK-O0-ARM64_32: ldr     x0, [sp, [[SLOT2]]]
-; CHECK-O0-ARM64_32: fcmp
 ; CHECK-O0-ARM64_32: str     x0, [sp[[OFFSET:.*]]]
+; CHECK-O0-ARM64_32: fcmp
 ; CHECK-O0-ARM64_32: b.le [[BB1]]
 ; reload from stack
-; CHECK-O0-ARM64_32: ldr [[ID3:x[0-9]+]], [sp[[OFFSET]]]
-; CHECK-O0-ARM64_32: mov x21, [[ID3]]
+; CHECK-O0-ARM64_32: ldr x21, [sp[[OFFSET]]]
 ; CHECK-O0-ARM64_32: ret
-
 entry:
   br label %bb_loop
 
@@ -261,16 +255,16 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
 ; CHECK-APPLE-NOT: x21
 
 ; CHECK-O0-LABEL: foo_sret:
-; CHECK-O0: mov w{{.*}}, #16
 ; spill x8
 ; CHECK-O0-DAG: str x8
+; CHECK-O0: mov w{{.*}}, #16
 ; CHECK-O0: malloc
+; CHECK-O0: mov	x10, x0
+; CHECK-O0: mov	x21, x10
 ; CHECK-O0: mov [[ID:w[0-9]+]], #1
-; CHECK-O0: strb [[ID]], [x0, #8]
+; CHECK-O0: strb [[ID]], [x10, #8]
 ; reload from stack
-; CHECK-O0: ldr [[SRET:x[0-9]+]]
-; CHECK-O0: str w{{.*}}, [{{.*}}[[SRET]], #4]
-; CHECK-O0: mov x21
+; CHECK-O0: str w{{.*}}, [x8, #4]
 ; CHECK-O0-NOT: x21
 entry:
   %call = call i8* @malloc(i64 16)
@@ -299,7 +293,7 @@ define float @caller3(i8* %error_ref) {
 
 ; CHECK-O0-LABEL: caller3:
 ; spill x0
-; CHECK-O0: str x0
+; CHECK-O0: str x0, [sp, [[OFFSET:#[0-9]+]]]
 ; CHECK-O0: mov x21
 ; CHECK-O0: bl {{.*}}foo_sret
 ; CHECK-O0: mov [[ID2:x[0-9]+]], x21
@@ -307,8 +301,8 @@ define float @caller3(i8* %error_ref) {
 ; CHECK-O0-ARM64_32: cmp x21, #0
 ; Access part of the error object and save it to error_ref
 ; reload from stack
+; CHECK-O0: ldr [[ID:x[0-9]+]], [sp, [[OFFSET]]]
 ; CHECK-O0: ldrb [[CODE:w[0-9]+]]
-; CHECK-O0: ldr [[ID:x[0-9]+]]
 ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
 ; CHECK-O0: bl {{.*}}free
 entry:
@@ -630,11 +624,10 @@ declare swiftcc void @foo2(%swift_error** swifterror)
 
 ; Make sure we properly assign registers during fast-isel.
 ; CHECK-O0-LABEL: testAssign
-; CHECK-O0: mov     [[TMP:x.*]], xzr
-; CHECK-O0: mov     x21, [[TMP]]
+; CHECK-O0: mov     x21, xzr
 ; CHECK-O0: bl      _foo2
 ; CHECK-O0: str     x21, [s[[STK:.*]]]
-; CHECK-O0: ldr     x0, [s[[STK]]]
+; CHECK-O0: ldr x{{[0-9]+}}, [s[[STK]]]
 
 ; CHECK-APPLE-LABEL: testAssign
 ; CHECK-APPLE: mov      x21, xzr

diff  --git a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
index aacc3c6542c7..7642c826acff 100644
--- a/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
+++ b/llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir
@@ -81,14 +81,14 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $b21, -240
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $b22, -256
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $b23, -272
+  ; CHECK:   STRQui $q0, $sp, 0 :: (store 16 into %stack.1)
   ; CHECK:   EH_LABEL <mcsymbol .Ltmp0>
-  ; CHECK:   STRQui $q0, $sp, 1 :: (store 16 into %stack.0)
-  ; CHECK:   BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def $lr, implicit $sp, implicit killed $q0, implicit-def $q0
+  ; CHECK:   BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $q0
+  ; CHECK:   STRQui killed $q0, $sp, 1 :: (store 16 into %stack.0)
   ; CHECK:   EH_LABEL <mcsymbol .Ltmp1>
-  ; CHECK:   STRQui killed $q0, $sp, 0 :: (store 16 into %stack.1)
   ; CHECK:   B %bb.1
   ; CHECK: bb.1..Lcontinue:
-  ; CHECK:   $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1)
+  ; CHECK:   $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0)
   ; CHECK:   $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2)
   ; CHECK:   $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4)
   ; CHECK:   $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6)
@@ -103,7 +103,7 @@ body:             |
   ; CHECK: bb.2..Lunwind (landing-pad):
   ; CHECK:   liveins: $x0, $x1
   ; CHECK:   EH_LABEL <mcsymbol .Ltmp2>
-  ; CHECK:   $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0)
+  ; CHECK:   $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1)
   ; CHECK:   $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2)
   ; CHECK:   $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4)
   ; CHECK:   $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6)

diff  --git a/llvm/test/CodeGen/AArch64/unwind-preserved.ll b/llvm/test/CodeGen/AArch64/unwind-preserved.ll
index 68fec0825542..33bbdfaa2cfd 100644
--- a/llvm/test/CodeGen/AArch64/unwind-preserved.ll
+++ b/llvm/test/CodeGen/AArch64/unwind-preserved.ll
@@ -50,14 +50,14 @@ define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) pe
 ; CHECK-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:  .Ltmp0:
-; CHECK-NEXT:    str z0, [sp, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl may_throw_sve
 ; CHECK-NEXT:  .Ltmp1:
-; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    str z0, [sp, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    b .LBB0_1
 ; CHECK-NEXT:  .LBB0_1: // %.Lcontinue
-; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
@@ -92,7 +92,7 @@ define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) pe
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB0_2: // %.Lunwind
 ; CHECK-NEXT:  .Ltmp2:
-; CHECK-NEXT:    ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
@@ -172,14 +172,14 @@ define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) pe
 ; GISEL-NEXT:    .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG
 ; GISEL-NEXT:    .cfi_offset w30, -8
 ; GISEL-NEXT:    .cfi_offset w29, -16
+; GISEL-NEXT:    str z0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:  .Ltmp0:
-; GISEL-NEXT:    str z0, [sp, #1, mul vl] // 16-byte Folded Spill
 ; GISEL-NEXT:    bl may_throw_sve
 ; GISEL-NEXT:  .Ltmp1:
-; GISEL-NEXT:    str z0, [sp] // 16-byte Folded Spill
+; GISEL-NEXT:    str z0, [sp, #1, mul vl] // 16-byte Folded Spill
 ; GISEL-NEXT:    b .LBB0_1
 ; GISEL-NEXT:  .LBB0_1: // %.Lcontinue
-; GISEL-NEXT:    ldr z0, [sp] // 16-byte Folded Reload
+; GISEL-NEXT:    ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
 ; GISEL-NEXT:    addvl sp, sp, #2
 ; GISEL-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
 ; GISEL-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
@@ -214,7 +214,7 @@ define <vscale x 4 x i32> @invoke_callee_may_throw_sve(<vscale x 4 x i32> %v) pe
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  .LBB0_2: // %.Lunwind
 ; GISEL-NEXT:  .Ltmp2:
-; GISEL-NEXT:    ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
+; GISEL-NEXT:    ldr z0, [sp] // 16-byte Folded Reload
 ; GISEL-NEXT:    addvl sp, sp, #2
 ; GISEL-NEXT:    ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
 ; GISEL-NEXT:    ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
@@ -293,14 +293,14 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
 ; CHECK-NEXT:    .cfi_offset b21, -240
 ; CHECK-NEXT:    .cfi_offset b22, -256
 ; CHECK-NEXT:    .cfi_offset b23, -272
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:  .Ltmp3:
-; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl may_throw_neon
 ; CHECK-NEXT:  .Ltmp4:
-; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    b .LBB1_1
 ; CHECK-NEXT:  .LBB1_1: // %.Lcontinue
-; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x29, x30, [sp, #288] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp q9, q8, [sp, #256] // 32-byte Folded Reload
 ; CHECK-NEXT:    ldp q11, q10, [sp, #224] // 32-byte Folded Reload
@@ -314,7 +314,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB1_2: // %.Lunwind
 ; CHECK-NEXT:  .Ltmp5:
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x29, x30, [sp, #288] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp q9, q8, [sp, #256] // 32-byte Folded Reload
 ; CHECK-NEXT:    ldp q11, q10, [sp, #224] // 32-byte Folded Reload
@@ -360,13 +360,13 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
 ; GISEL-NEXT:    .cfi_offset b21, -240
 ; GISEL-NEXT:    .cfi_offset b22, -256
 ; GISEL-NEXT:    .cfi_offset b23, -272
+; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:  .Ltmp3:
-; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    bl may_throw_neon
+; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:  .Ltmp4:
-; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:  // %bb.1: // %.Lcontinue
-; GISEL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; GISEL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; GISEL-NEXT:    ldp x29, x30, [sp, #288] // 16-byte Folded Reload
 ; GISEL-NEXT:    ldp q9, q8, [sp, #256] // 32-byte Folded Reload
 ; GISEL-NEXT:    ldp q11, q10, [sp, #224] // 32-byte Folded Reload
@@ -380,7 +380,7 @@ define aarch64_vector_pcs <4 x i32> @invoke_callee_may_throw_neon(<4 x i32> %v)
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  .LBB1_2: // %.Lunwind
 ; GISEL-NEXT:  .Ltmp5:
-; GISEL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; GISEL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; GISEL-NEXT:    ldp x29, x30, [sp, #288] // 16-byte Folded Reload
 ; GISEL-NEXT:    ldp q9, q8, [sp, #256] // 32-byte Folded Reload
 ; GISEL-NEXT:    ldp q11, q10, [sp, #224] // 32-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
index 587f808bc55e..6515d25f7415 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll
@@ -28,15 +28,15 @@ define i32 @test_sgpr_matching_constraint() nounwind {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    s_mov_b32 s4, 7
+; CHECK-NEXT:    s_mov_b32 s5, 7
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    s_mov_b32 s5, 8
+; CHECK-NEXT:    s_mov_b32 s4, 8
 ; CHECK-NEXT:    ;;#ASMEND
 ; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    s_add_u32 s5, s4, s5
+; CHECK-NEXT:    s_add_u32 s4, s5, s4
 ; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    v_mov_b32_e32 v0, s5
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind

diff  --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 6da332a596fb..22a4fc98b436 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -17,29 +17,28 @@
 ; GCN: s_mov_b32 m0, -1
 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
 
-; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
-; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
-; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
-
 ; Spill load
 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
 
 ; Spill saved exec
+; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
 ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
 ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
 
 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0
 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1
-; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:20 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill
 
+; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
 ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
 
 ; GCN: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
 
 ; GCN: ; %bb.{{[0-9]+}}: ; %if
+; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
 ; GCN: s_mov_b32 m0, -1
 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]]
-; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
 ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0)
 
 
@@ -53,9 +52,7 @@
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
 
-
-
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:20 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET]] ; 4-byte Folded Reload
 ; VMEM: s_waitcnt vmcnt(0)
 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
@@ -88,29 +85,26 @@ endif:
 ; VGPR: workitem_private_segment_byte_size = 16{{$}}
 
 ; GCN: {{^}}; %bb.0:
-
-; GCN: s_mov_b32 m0, -1
-; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
-
-; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0
-
-; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
-; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
+; GCN-DAG: s_mov_b32 m0, -1
+; GCN-DAG: v_mov_b32_e32 [[PTR0:v[0-9]+]], 0{{$}}
+; GCN: ds_read_b32 [[LOAD0:v[0-9]+]], [[PTR0]]
+; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v0
 
 ; Spill load
 ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec
 
 ; Spill saved exec
 ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
 ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
 
-
 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0
 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1
-; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:24 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill
 
-; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
 
+; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
+; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
 ; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
 
 
@@ -127,7 +121,7 @@ endif:
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
 ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
 
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:24 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET]] ; 4-byte Folded Reload
 ; VMEM: s_waitcnt vmcnt(0)
 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0
 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
@@ -139,7 +133,7 @@ endif:
 define amdgpu_kernel void @divergent_loop(i32 addrspace(1)* %out) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %load0 = load volatile i32, i32 addrspace(3)* undef
+  %load0 = load volatile i32, i32 addrspace(3)* null
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %loop, label %end
 
@@ -161,8 +155,12 @@ end:
 ; GCN-LABEL: {{^}}divergent_if_else_endif:
 ; GCN: {{^}}; %bb.0:
 
-; GCN: s_mov_b32 m0, -1
-; GCN: ds_read_b32 [[LOAD0:v[0-9]+]]
+; GCN-DAG: s_mov_b32 m0, -1
+; GCN-DAG: v_mov_b32_e32 [[PTR0:v[0-9]+]], 0{{$}}
+; GCN: ds_read_b32 [[LOAD0:v[0-9]+]], [[PTR0]]
+
+; Spill load
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
 
 ; GCN: s_mov_b32 [[ZERO:s[0-9]+]], 0
 ; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], [[ZERO]], v0
@@ -171,9 +169,6 @@ end:
 ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
 ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}
 
-; Spill load
-; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-
 ; Spill saved exec
 ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]]
 ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
@@ -192,7 +187,6 @@ end:
 ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
 ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
 
-
 ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[SAVEEXEC_OFFSET]]
 ; VMEM: s_waitcnt vmcnt(0)
 ; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 0
@@ -219,8 +213,8 @@ end:
 
 
 ; GCN: ; %bb.{{[0-9]+}}: ; %if
-; GCN: ds_read_b32
 ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
+; GCN: ds_read_b32
 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
 ; GCN: buffer_store_dword [[ADD]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill
 ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
@@ -248,7 +242,7 @@ end:
 define amdgpu_kernel void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %load0 = load volatile i32, i32 addrspace(3)* undef
+  %load0 = load volatile i32, i32 addrspace(3)* null
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %if, label %else
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
new file mode 100644
index 000000000000..d50de439d47d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir
@@ -0,0 +1,62 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s
+
+# Make sure incorrect kills aren't emitted on vcc
+
+---
+name:            foo
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: foo
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; CHECK: $sgpr4_sgpr5 = COPY $vcc
+    ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $vcc, implicit $exec
+    ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed $sgpr4_sgpr5
+    %0:vgpr_32 = COPY $vgpr0
+    V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec
+    $sgpr4_sgpr5 = COPY $vcc
+    %1:sreg_64_xexec = COPY $vcc
+    %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec
+    $vgpr0 = COPY %2
+    S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5
+
+...
+
+# This would hit "Unexpected reg unit state" assert.
+---
+name:            bar
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: bar
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc
+    ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
+    ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc
+    ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5)
+    ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec
+    ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc
+    %0:vgpr_32 = COPY $vgpr0
+    V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec
+    %3:sreg_64_xexec = COPY $vcc
+    %1:sreg_64_xexec = COPY $vcc
+    %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec
+    $vgpr0 = COPY %2
+    S_ENDPGM 0, implicit $vgpr0, implicit %3
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir
new file mode 100644
index 000000000000..bf32ebaf473d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir
@@ -0,0 +1,27 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s
+
+# This would hit "Illegal subregister index for physical register" verifier error since
+# tied operands would skip dropping the subregister index.
+
+---
+name:   invalid_subreg_index
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $sgpr0
+
+    ; CHECK-LABEL: name: invalid_subreg_index
+    ; CHECK: liveins: $vgpr0, $sgpr0
+    ; CHECK: $m0 = COPY renamable $sgpr0
+    ; CHECK: undef renamable $vgpr1 = V_INTERP_P2_F32 undef $vgpr1, undef $vgpr0, 0, 1, implicit $mode, implicit $m0, implicit $exec, implicit-def dead $vgpr0_vgpr1
+    ; CHECK: S_ENDPGM 0, implicit killed renamable $sgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:sgpr_32 = COPY $sgpr0
+    $m0 = COPY %1
+    undef %2.sub1:vreg_64 = V_INTERP_P2_F32 undef %2.sub1, undef %0:vgpr_32, 0, 1, implicit $mode, implicit $m0, implicit $exec
+    S_ENDPGM 0, implicit %1
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
index 32de26283781..7e70eb3a952c 100644
--- a/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
+++ b/llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir
@@ -18,7 +18,7 @@ body:             |
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
   ; GCN:   renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
-  ; GCN:   GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
+  ; GCN:   GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
   ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; GCN: bb.2:
   ; GCN:   S_ENDPGM 0
@@ -53,9 +53,10 @@ body:             |
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
   ; GCN:   renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
-  ; GCN:   GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
+  ; GCN:   GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec
   ; GCN:   renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec
-  ; GCN:   GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec
+  ; GCN:   SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+  ; GCN:   GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec
   ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; GCN: bb.2:
   ; GCN:   S_ENDPGM 0
@@ -92,9 +93,10 @@ body:             |
   ; GCN:   SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
   ; GCN: bb.1:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN:   renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
-  ; GCN:   $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
-  ; GCN:   GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec
+  ; GCN:   $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
+  ; GCN:   renamable $vgpr2 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
+  ; GCN:   SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+  ; GCN:   GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec
   ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; GCN: bb.2:
   ; GCN:   S_ENDPGM 0
@@ -128,9 +130,9 @@ body:             |
   ; GCN: bb.1:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
-  ; GCN:   GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec
-  ; GCN:   renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec
-  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+  ; GCN:   GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec
+  ; GCN:   renamable $vgpr0 = V_ADD_U32_e64 1, 1, 0, implicit $exec
+  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
   ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; GCN: bb.2:
   ; GCN:   S_ENDPGM 0
@@ -164,9 +166,8 @@ body:             |
   ; GCN: bb.1:
   ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; GCN:   $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
-  ; GCN:   undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3
-  ; GCN:   GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec
-  ; GCN:   SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5)
+  ; GCN:   undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def dead $vgpr2_vgpr3
+  ; GCN:   GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr1, 0, 0, 0, 0, implicit $exec
   ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; GCN: bb.2:
   ; GCN:   S_ENDPGM 0

diff  --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index 3d3b511ab34b..c6ba50706812 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -12,101 +12,96 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
   ; GCN: bb.0.entry:
   ; GCN:   successors: %bb.1(0x80000000)
   ; GCN:   liveins: $vgpr0, $sgpr0_sgpr1
+  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
   ; GCN:   renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
-  ; GCN:   renamable $sgpr2 = COPY renamable $sgpr1
+  ; GCN:   renamable $sgpr6 = COPY renamable $sgpr1
   ; GCN:   renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
-  ; GCN:   renamable $sgpr1 = S_MOV_B32 61440
-  ; GCN:   renamable $sgpr3 = S_MOV_B32 -1
-  ; GCN:   undef renamable $sgpr4 = COPY killed renamable $sgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
-  ; GCN:   renamable $sgpr5 = COPY killed renamable $sgpr2
-  ; GCN:   renamable $sgpr6 = COPY killed renamable $sgpr3
-  ; GCN:   renamable $sgpr7 = COPY killed renamable $sgpr1
+  ; GCN:   renamable $sgpr4 = S_MOV_B32 61440
+  ; GCN:   renamable $sgpr5 = S_MOV_B32 -1
+  ; GCN:   undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   renamable $sgpr1 = COPY killed renamable $sgpr6
+  ; GCN:   renamable $sgpr2 = COPY killed renamable $sgpr5
+  ; GCN:   renamable $sgpr3 = COPY killed renamable $sgpr4
+  ; GCN:   SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.2, align 4, addrspace 5)
   ; GCN:   renamable $sgpr0 = S_MOV_B32 16
   ; GCN:   renamable $sgpr1 = S_MOV_B32 15
   ; GCN:   renamable $sgpr2 = S_MOV_B32 14
   ; GCN:   renamable $sgpr3 = S_MOV_B32 13
-  ; GCN:   renamable $sgpr8 = S_MOV_B32 12
-  ; GCN:   renamable $sgpr9 = S_MOV_B32 11
-  ; GCN:   renamable $sgpr10 = S_MOV_B32 10
-  ; GCN:   renamable $sgpr11 = S_MOV_B32 9
-  ; GCN:   renamable $sgpr12 = S_MOV_B32 8
-  ; GCN:   renamable $sgpr13 = S_MOV_B32 7
-  ; GCN:   renamable $sgpr14 = S_MOV_B32 6
-  ; GCN:   renamable $sgpr15 = S_MOV_B32 5
-  ; GCN:   renamable $sgpr16 = S_MOV_B32 3
-  ; GCN:   renamable $sgpr17 = S_MOV_B32 2
-  ; GCN:   renamable $sgpr18 = S_MOV_B32 1
-  ; GCN:   renamable $sgpr19 = S_MOV_B32 0
-  ; GCN:   renamable $vgpr1 = COPY killed renamable $sgpr19
-  ; GCN:   renamable $vgpr2 = COPY killed renamable $sgpr18
-  ; GCN:   renamable $vgpr3 = COPY killed renamable $sgpr17
-  ; GCN:   renamable $vgpr4 = COPY killed renamable $sgpr16
-  ; GCN:   renamable $vgpr5 = COPY killed renamable $sgpr15
-  ; GCN:   renamable $vgpr6 = COPY killed renamable $sgpr14
-  ; GCN:   renamable $vgpr7 = COPY killed renamable $sgpr13
-  ; GCN:   renamable $vgpr8 = COPY killed renamable $sgpr12
-  ; GCN:   renamable $vgpr9 = COPY killed renamable $sgpr11
-  ; GCN:   renamable $vgpr10 = COPY killed renamable $sgpr10
-  ; GCN:   renamable $vgpr11 = COPY killed renamable $sgpr9
-  ; GCN:   renamable $vgpr12 = COPY killed renamable $sgpr8
-  ; GCN:   renamable $vgpr13 = COPY killed renamable $sgpr3
-  ; GCN:   renamable $vgpr14 = COPY killed renamable $sgpr2
-  ; GCN:   renamable $vgpr15 = COPY killed renamable $sgpr1
+  ; GCN:   renamable $sgpr4 = S_MOV_B32 12
+  ; GCN:   renamable $sgpr5 = S_MOV_B32 11
+  ; GCN:   renamable $sgpr6 = S_MOV_B32 10
+  ; GCN:   renamable $sgpr7 = S_MOV_B32 9
+  ; GCN:   renamable $sgpr8 = S_MOV_B32 8
+  ; GCN:   renamable $sgpr9 = S_MOV_B32 7
+  ; GCN:   renamable $sgpr10 = S_MOV_B32 6
+  ; GCN:   renamable $sgpr11 = S_MOV_B32 5
+  ; GCN:   renamable $sgpr12 = S_MOV_B32 3
+  ; GCN:   renamable $sgpr13 = S_MOV_B32 2
+  ; GCN:   renamable $sgpr14 = S_MOV_B32 1
+  ; GCN:   renamable $sgpr15 = S_MOV_B32 0
+  ; GCN:   renamable $vgpr0 = COPY killed renamable $sgpr15
+  ; GCN:   renamable $vgpr30 = COPY killed renamable $sgpr14
+  ; GCN:   renamable $vgpr29 = COPY killed renamable $sgpr13
+  ; GCN:   renamable $vgpr28 = COPY killed renamable $sgpr12
+  ; GCN:   renamable $vgpr27 = COPY killed renamable $sgpr11
+  ; GCN:   renamable $vgpr26 = COPY killed renamable $sgpr10
+  ; GCN:   renamable $vgpr25 = COPY killed renamable $sgpr9
+  ; GCN:   renamable $vgpr24 = COPY killed renamable $sgpr8
+  ; GCN:   renamable $vgpr23 = COPY killed renamable $sgpr7
+  ; GCN:   renamable $vgpr22 = COPY killed renamable $sgpr6
+  ; GCN:   renamable $vgpr21 = COPY killed renamable $sgpr5
+  ; GCN:   renamable $vgpr20 = COPY killed renamable $sgpr4
+  ; GCN:   renamable $vgpr19 = COPY killed renamable $sgpr3
+  ; GCN:   renamable $vgpr18 = COPY killed renamable $sgpr2
+  ; GCN:   renamable $vgpr17 = COPY killed renamable $sgpr1
   ; GCN:   renamable $vgpr16 = COPY killed renamable $sgpr0
-  ; GCN:   undef renamable $vgpr17 = COPY killed renamable $vgpr1, implicit-def $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32
-  ; GCN:   renamable $vgpr18 = COPY killed renamable $vgpr2
-  ; GCN:   renamable $vgpr19 = COPY killed renamable $vgpr3
-  ; GCN:   renamable $vgpr20 = COPY killed renamable $vgpr4
-  ; GCN:   renamable $vgpr21 = COPY killed renamable $vgpr5
-  ; GCN:   renamable $vgpr22 = COPY killed renamable $vgpr6
-  ; GCN:   renamable $vgpr23 = COPY killed renamable $vgpr7
-  ; GCN:   renamable $vgpr24 = COPY killed renamable $vgpr8
-  ; GCN:   renamable $vgpr25 = COPY killed renamable $vgpr9
-  ; GCN:   renamable $vgpr26 = COPY killed renamable $vgpr10
-  ; GCN:   renamable $vgpr27 = COPY killed renamable $vgpr11
-  ; GCN:   renamable $vgpr28 = COPY killed renamable $vgpr12
-  ; GCN:   renamable $vgpr29 = COPY killed renamable $vgpr13
-  ; GCN:   renamable $vgpr30 = COPY killed renamable $vgpr14
-  ; GCN:   renamable $vgpr31 = COPY killed renamable $vgpr15
-  ; GCN:   renamable $vgpr32 = COPY killed renamable $vgpr16
+  ; GCN:   undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+  ; GCN:   renamable $vgpr1 = COPY killed renamable $vgpr30
+  ; GCN:   renamable $vgpr2 = COPY killed renamable $vgpr29
+  ; GCN:   renamable $vgpr3 = COPY killed renamable $vgpr28
+  ; GCN:   renamable $vgpr4 = COPY killed renamable $vgpr27
+  ; GCN:   renamable $vgpr5 = COPY killed renamable $vgpr26
+  ; GCN:   renamable $vgpr6 = COPY killed renamable $vgpr25
+  ; GCN:   renamable $vgpr7 = COPY killed renamable $vgpr24
+  ; GCN:   renamable $vgpr8 = COPY killed renamable $vgpr23
+  ; GCN:   renamable $vgpr9 = COPY killed renamable $vgpr22
+  ; GCN:   renamable $vgpr10 = COPY killed renamable $vgpr21
+  ; GCN:   renamable $vgpr11 = COPY killed renamable $vgpr20
+  ; GCN:   renamable $vgpr12 = COPY killed renamable $vgpr19
+  ; GCN:   renamable $vgpr13 = COPY killed renamable $vgpr18
+  ; GCN:   renamable $vgpr14 = COPY killed renamable $vgpr17
+  ; GCN:   renamable $vgpr15 = COPY killed renamable $vgpr16
+  ; GCN:   SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.1, align 4, addrspace 5)
   ; GCN:   renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
-  ; GCN:   renamable $vgpr1 = IMPLICIT_DEF
-  ; GCN:   renamable $sgpr2_sgpr3 = IMPLICIT_DEF
-  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
-  ; GCN:   SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5)
-  ; GCN:   SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5)
-  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5)
-  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
-  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
+  ; GCN:   renamable $vgpr0 = IMPLICIT_DEF
+  ; GCN:   renamable $sgpr0_sgpr1 = IMPLICIT_DEF
   ; GCN: bb.1:
   ; GCN:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
-  ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5)
-  ; GCN:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
-  ; GCN:   $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
-  ; GCN:   renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-  ; GCN:   renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec
-  ; GCN:   renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.4, align 4, addrspace 5)
+  ; GCN:   $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.5, addrspace 5)
+  ; GCN:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.1, align 4, addrspace 5)
+  ; GCN:   $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
+  ; GCN:   renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec
+  ; GCN:   renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec
+  ; GCN:   renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; GCN:   S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit-def undef $mode, implicit $m0, implicit $mode
-  ; GCN:   $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5)
-  ; GCN:   renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
+  ; GCN:   renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0
+  ; GCN:   SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5)
   ; GCN:   S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode
-  ; GCN:   renamable $vgpr19 = COPY renamable $vgpr18
-  ; GCN:   renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5
-  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5)
-  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5)
-  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
-  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
-  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
-  ; GCN:   $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5)
+  ; GCN:   renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.4, align 4, addrspace 5)
+  ; GCN:   $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
   ; GCN:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
   ; GCN: bb.3:
   ; GCN:   successors: %bb.2(0x80000000)
-  ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.3, align 4, addrspace 5)
-  ; GCN:   $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1
+  ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5)
+  ; GCN:   $exec = S_MOV_B64 renamable $sgpr0_sgpr1
   ; GCN: bb.2:
-  ; GCN:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 16 from %stack.1, align 4, addrspace 5)
-  ; GCN:   BUFFER_STORE_DWORD_OFFSET renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
+  ; GCN:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5)
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 16 from %stack.2, align 4, addrspace 5)
+  ; GCN:   BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
   ; GCN:   S_ENDPGM 0
 entry:
   %id = call i32 @llvm.amdgcn.workitem.id.x() #1

diff  --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 230cd8eb5b0d..4dfc9bce69aa 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -236,17 +236,18 @@ entry:
 ; W64-O0-DAG: s_mov_b32 [[IDX_S:s[0-9]+]], s{{[0-9]+}}
 ; W64-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], s{{[0-9]+}}
 ; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
-; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
+; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
 
-; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]:
-; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
+; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
 ; W64-O0: s_waitcnt vmcnt(0)
-; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]]
+; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]]
 ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]]
-; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]]
+; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]]
 ; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]]
 ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
 ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]]
@@ -255,37 +256,37 @@ entry:
 ; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]]
 ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
 ; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
-; W64-O0-DAG: s_mov_b32 s[[S0:[0-9]+]], s[[SRSRCTMP0]]
 ; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]]
 ; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]]
 ; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]]
 ; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF]] ; 4-byte Folded Reload
 ; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen
 ; W64-O0: s_waitcnt vmcnt(0)
 ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
 ; W64-O0: s_xor_b64 exec, exec, [[SAVE]]
 ; W64-O0-NEXT: s_cbranch_execnz [[LOOPBB0]]
-; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]]
+
+; XXX-W64-O0: s_mov_b64 exec, [[SAVEEXEC]]
 ; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
 ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
 ; W64-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
 
-; W64-O0: ; %bb.{{[0-9]+}}:
+; W64-O0: ; %bb.{{[0-9]+}}: ; %bb1
+; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
 ; W64-O0-DAG: s_mov_b64 s{{\[}}[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]{{\]}}, exec
-; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
 ; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]]
-; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]]
+; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]]
 
-; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]:
-; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
+; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32  offset:[[IDX_OFF]] ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
 ; W64-O0: s_waitcnt vmcnt(0)
-; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]]
+; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]]
 ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]]
-; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]]
+; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]]
 ; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]]
 ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}}
 ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]]
@@ -294,12 +295,10 @@ entry:
 ; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]]
 ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
 ; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
-; W64-O0-DAG: s_mov_b32 s[[S0:[0-9]+]], s[[SRSRCTMP0]]
 ; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]]
 ; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]]
 ; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]]
 ; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF]] ; 4-byte Folded Reload
 ; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen
 ; W64-O0: s_waitcnt vmcnt(0)
 ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill

diff  --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index b119ffd303e0..dccee0a298a3 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -15,381 +15,379 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out,
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 0
+; GCN-NEXT:    v_writelane_b32 v0, s5, 1
+; GCN-NEXT:    v_writelane_b32 v0, s6, 2
+; GCN-NEXT:    v_writelane_b32 v0, s7, 3
+; GCN-NEXT:    v_writelane_b32 v0, s8, 4
+; GCN-NEXT:    v_writelane_b32 v0, s9, 5
+; GCN-NEXT:    v_writelane_b32 v0, s10, 6
+; GCN-NEXT:    v_writelane_b32 v0, s11, 7
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[12:19]
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 8
+; GCN-NEXT:    v_writelane_b32 v0, s5, 9
+; GCN-NEXT:    v_writelane_b32 v0, s6, 10
+; GCN-NEXT:    v_writelane_b32 v0, s7, 11
+; GCN-NEXT:    v_writelane_b32 v0, s8, 12
+; GCN-NEXT:    v_writelane_b32 v0, s9, 13
+; GCN-NEXT:    v_writelane_b32 v0, s10, 14
+; GCN-NEXT:    v_writelane_b32 v0, s11, 15
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 16
+; GCN-NEXT:    v_writelane_b32 v0, s5, 17
+; GCN-NEXT:    v_writelane_b32 v0, s6, 18
+; GCN-NEXT:    v_writelane_b32 v0, s7, 19
+; GCN-NEXT:    v_writelane_b32 v0, s8, 20
+; GCN-NEXT:    v_writelane_b32 v0, s9, 21
+; GCN-NEXT:    v_writelane_b32 v0, s10, 22
+; GCN-NEXT:    v_writelane_b32 v0, s11, 23
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 24
+; GCN-NEXT:    v_writelane_b32 v0, s5, 25
+; GCN-NEXT:    v_writelane_b32 v0, s6, 26
+; GCN-NEXT:    v_writelane_b32 v0, s7, 27
+; GCN-NEXT:    v_writelane_b32 v0, s8, 28
+; GCN-NEXT:    v_writelane_b32 v0, s9, 29
+; GCN-NEXT:    v_writelane_b32 v0, s10, 30
+; GCN-NEXT:    v_writelane_b32 v0, s11, 31
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[20:27]
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 32
+; GCN-NEXT:    v_writelane_b32 v0, s5, 33
+; GCN-NEXT:    v_writelane_b32 v0, s6, 34
+; GCN-NEXT:    v_writelane_b32 v0, s7, 35
+; GCN-NEXT:    v_writelane_b32 v0, s8, 36
+; GCN-NEXT:    v_writelane_b32 v0, s9, 37
+; GCN-NEXT:    v_writelane_b32 v0, s10, 38
+; GCN-NEXT:    v_writelane_b32 v0, s11, 39
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[36:43]
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 40
+; GCN-NEXT:    v_writelane_b32 v0, s5, 41
+; GCN-NEXT:    v_writelane_b32 v0, s6, 42
+; GCN-NEXT:    v_writelane_b32 v0, s7, 43
+; GCN-NEXT:    v_writelane_b32 v0, s8, 44
+; GCN-NEXT:    v_writelane_b32 v0, s9, 45
+; GCN-NEXT:    v_writelane_b32 v0, s10, 46
+; GCN-NEXT:    v_writelane_b32 v0, s11, 47
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[44:51]
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 48
+; GCN-NEXT:    v_writelane_b32 v0, s5, 49
+; GCN-NEXT:    v_writelane_b32 v0, s6, 50
+; GCN-NEXT:    v_writelane_b32 v0, s7, 51
+; GCN-NEXT:    v_writelane_b32 v0, s8, 52
+; GCN-NEXT:    v_writelane_b32 v0, s9, 53
+; GCN-NEXT:    v_writelane_b32 v0, s10, 54
+; GCN-NEXT:    v_writelane_b32 v0, s11, 55
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[52:59]
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 56
+; GCN-NEXT:    v_writelane_b32 v0, s5, 57
+; GCN-NEXT:    v_writelane_b32 v0, s6, 58
+; GCN-NEXT:    v_writelane_b32 v0, s7, 59
+; GCN-NEXT:    v_writelane_b32 v0, s8, 60
+; GCN-NEXT:    v_writelane_b32 v0, s9, 61
+; GCN-NEXT:    v_writelane_b32 v0, s10, 62
+; GCN-NEXT:    v_writelane_b32 v0, s11, 63
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 0
+; GCN-NEXT:    v_writelane_b32 v1, s5, 1
+; GCN-NEXT:    v_writelane_b32 v1, s6, 2
+; GCN-NEXT:    v_writelane_b32 v1, s7, 3
+; GCN-NEXT:    v_writelane_b32 v1, s8, 4
+; GCN-NEXT:    v_writelane_b32 v1, s9, 5
+; GCN-NEXT:    v_writelane_b32 v1, s10, 6
+; GCN-NEXT:    v_writelane_b32 v1, s11, 7
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[60:67]
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 8
+; GCN-NEXT:    v_writelane_b32 v1, s5, 9
+; GCN-NEXT:    v_writelane_b32 v1, s6, 10
+; GCN-NEXT:    v_writelane_b32 v1, s7, 11
+; GCN-NEXT:    v_writelane_b32 v1, s8, 12
+; GCN-NEXT:    v_writelane_b32 v1, s9, 13
+; GCN-NEXT:    v_writelane_b32 v1, s10, 14
+; GCN-NEXT:    v_writelane_b32 v1, s11, 15
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 16
+; GCN-NEXT:    v_writelane_b32 v1, s5, 17
+; GCN-NEXT:    v_writelane_b32 v1, s6, 18
+; GCN-NEXT:    v_writelane_b32 v1, s7, 19
+; GCN-NEXT:    v_writelane_b32 v1, s8, 20
+; GCN-NEXT:    v_writelane_b32 v1, s9, 21
+; GCN-NEXT:    v_writelane_b32 v1, s10, 22
+; GCN-NEXT:    v_writelane_b32 v1, s11, 23
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 24
+; GCN-NEXT:    v_writelane_b32 v1, s5, 25
+; GCN-NEXT:    v_writelane_b32 v1, s6, 26
+; GCN-NEXT:    v_writelane_b32 v1, s7, 27
+; GCN-NEXT:    v_writelane_b32 v1, s8, 28
+; GCN-NEXT:    v_writelane_b32 v1, s9, 29
+; GCN-NEXT:    v_writelane_b32 v1, s10, 30
+; GCN-NEXT:    v_writelane_b32 v1, s11, 31
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 32
+; GCN-NEXT:    v_writelane_b32 v1, s5, 33
+; GCN-NEXT:    v_writelane_b32 v1, s6, 34
+; GCN-NEXT:    v_writelane_b32 v1, s7, 35
+; GCN-NEXT:    v_writelane_b32 v1, s8, 36
+; GCN-NEXT:    v_writelane_b32 v1, s9, 37
+; GCN-NEXT:    v_writelane_b32 v1, s10, 38
+; GCN-NEXT:    v_writelane_b32 v1, s11, 39
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 40
+; GCN-NEXT:    v_writelane_b32 v1, s5, 41
+; GCN-NEXT:    v_writelane_b32 v1, s6, 42
+; GCN-NEXT:    v_writelane_b32 v1, s7, 43
+; GCN-NEXT:    v_writelane_b32 v1, s8, 44
+; GCN-NEXT:    v_writelane_b32 v1, s9, 45
+; GCN-NEXT:    v_writelane_b32 v1, s10, 46
+; GCN-NEXT:    v_writelane_b32 v1, s11, 47
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[68:75]
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 48
+; GCN-NEXT:    v_writelane_b32 v1, s5, 49
+; GCN-NEXT:    v_writelane_b32 v1, s6, 50
+; GCN-NEXT:    v_writelane_b32 v1, s7, 51
+; GCN-NEXT:    v_writelane_b32 v1, s8, 52
+; GCN-NEXT:    v_writelane_b32 v1, s9, 53
+; GCN-NEXT:    v_writelane_b32 v1, s10, 54
+; GCN-NEXT:    v_writelane_b32 v1, s11, 55
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[76:83]
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 56
+; GCN-NEXT:    v_writelane_b32 v1, s5, 57
+; GCN-NEXT:    v_writelane_b32 v1, s6, 58
+; GCN-NEXT:    v_writelane_b32 v1, s7, 59
+; GCN-NEXT:    v_writelane_b32 v1, s8, 60
+; GCN-NEXT:    v_writelane_b32 v1, s9, 61
+; GCN-NEXT:    v_writelane_b32 v1, s10, 62
+; GCN-NEXT:    v_writelane_b32 v1, s11, 63
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[84:91]
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v2, s4, 0
+; GCN-NEXT:    v_writelane_b32 v2, s5, 1
+; GCN-NEXT:    v_writelane_b32 v2, s6, 2
+; GCN-NEXT:    v_writelane_b32 v2, s7, 3
+; GCN-NEXT:    v_writelane_b32 v2, s8, 4
+; GCN-NEXT:    v_writelane_b32 v2, s9, 5
+; GCN-NEXT:    v_writelane_b32 v2, s10, 6
+; GCN-NEXT:    v_writelane_b32 v2, s11, 7
+; GCN-NEXT:    s_mov_b32 s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_writelane_b32 v0, s0, 0
-; GCN-NEXT:    v_writelane_b32 v0, s4, 1
-; GCN-NEXT:    v_writelane_b32 v0, s5, 2
-; GCN-NEXT:    v_writelane_b32 v0, s6, 3
-; GCN-NEXT:    v_writelane_b32 v0, s7, 4
-; GCN-NEXT:    v_writelane_b32 v0, s8, 5
-; GCN-NEXT:    v_writelane_b32 v0, s9, 6
-; GCN-NEXT:    v_writelane_b32 v0, s10, 7
-; GCN-NEXT:    v_writelane_b32 v0, s11, 8
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 9
-; GCN-NEXT:    v_writelane_b32 v0, s1, 10
-; GCN-NEXT:    v_writelane_b32 v0, s2, 11
-; GCN-NEXT:    v_writelane_b32 v0, s3, 12
-; GCN-NEXT:    v_writelane_b32 v0, s4, 13
-; GCN-NEXT:    v_writelane_b32 v0, s5, 14
-; GCN-NEXT:    v_writelane_b32 v0, s6, 15
-; GCN-NEXT:    v_writelane_b32 v0, s7, 16
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 17
-; GCN-NEXT:    v_writelane_b32 v0, s1, 18
-; GCN-NEXT:    v_writelane_b32 v0, s2, 19
-; GCN-NEXT:    v_writelane_b32 v0, s3, 20
-; GCN-NEXT:    v_writelane_b32 v0, s4, 21
-; GCN-NEXT:    v_writelane_b32 v0, s5, 22
-; GCN-NEXT:    v_writelane_b32 v0, s6, 23
-; GCN-NEXT:    v_writelane_b32 v0, s7, 24
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 25
-; GCN-NEXT:    v_writelane_b32 v0, s1, 26
-; GCN-NEXT:    v_writelane_b32 v0, s2, 27
-; GCN-NEXT:    v_writelane_b32 v0, s3, 28
-; GCN-NEXT:    v_writelane_b32 v0, s4, 29
-; GCN-NEXT:    v_writelane_b32 v0, s5, 30
-; GCN-NEXT:    v_writelane_b32 v0, s6, 31
-; GCN-NEXT:    v_writelane_b32 v0, s7, 32
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 33
-; GCN-NEXT:    v_writelane_b32 v0, s1, 34
-; GCN-NEXT:    v_writelane_b32 v0, s2, 35
-; GCN-NEXT:    v_writelane_b32 v0, s3, 36
-; GCN-NEXT:    v_writelane_b32 v0, s4, 37
-; GCN-NEXT:    v_writelane_b32 v0, s5, 38
-; GCN-NEXT:    v_writelane_b32 v0, s6, 39
-; GCN-NEXT:    v_writelane_b32 v0, s7, 40
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 41
-; GCN-NEXT:    v_writelane_b32 v0, s1, 42
-; GCN-NEXT:    v_writelane_b32 v0, s2, 43
-; GCN-NEXT:    v_writelane_b32 v0, s3, 44
-; GCN-NEXT:    v_writelane_b32 v0, s4, 45
-; GCN-NEXT:    v_writelane_b32 v0, s5, 46
-; GCN-NEXT:    v_writelane_b32 v0, s6, 47
-; GCN-NEXT:    v_writelane_b32 v0, s7, 48
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 49
-; GCN-NEXT:    v_writelane_b32 v0, s1, 50
-; GCN-NEXT:    v_writelane_b32 v0, s2, 51
-; GCN-NEXT:    v_writelane_b32 v0, s3, 52
-; GCN-NEXT:    v_writelane_b32 v0, s4, 53
-; GCN-NEXT:    v_writelane_b32 v0, s5, 54
-; GCN-NEXT:    v_writelane_b32 v0, s6, 55
-; GCN-NEXT:    v_writelane_b32 v0, s7, 56
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_mov_b32 s8, 0
-; GCN-NEXT:    v_readlane_b32 s9, v0, 0
-; GCN-NEXT:    s_cmp_lg_u32 s9, s8
-; GCN-NEXT:    v_writelane_b32 v0, s12, 57
-; GCN-NEXT:    v_writelane_b32 v0, s13, 58
-; GCN-NEXT:    v_writelane_b32 v0, s14, 59
-; GCN-NEXT:    v_writelane_b32 v0, s15, 60
-; GCN-NEXT:    v_writelane_b32 v0, s16, 61
-; GCN-NEXT:    v_writelane_b32 v0, s17, 62
-; GCN-NEXT:    v_writelane_b32 v0, s18, 63
-; GCN-NEXT:    v_writelane_b32 v1, s19, 0
-; GCN-NEXT:    v_writelane_b32 v1, s20, 1
-; GCN-NEXT:    v_writelane_b32 v1, s21, 2
-; GCN-NEXT:    v_writelane_b32 v1, s22, 3
-; GCN-NEXT:    v_writelane_b32 v1, s23, 4
-; GCN-NEXT:    v_writelane_b32 v1, s24, 5
-; GCN-NEXT:    v_writelane_b32 v1, s25, 6
-; GCN-NEXT:    v_writelane_b32 v1, s26, 7
-; GCN-NEXT:    v_writelane_b32 v1, s27, 8
-; GCN-NEXT:    v_writelane_b32 v1, s36, 9
-; GCN-NEXT:    v_writelane_b32 v1, s37, 10
-; GCN-NEXT:    v_writelane_b32 v1, s38, 11
-; GCN-NEXT:    v_writelane_b32 v1, s39, 12
-; GCN-NEXT:    v_writelane_b32 v1, s40, 13
-; GCN-NEXT:    v_writelane_b32 v1, s41, 14
-; GCN-NEXT:    v_writelane_b32 v1, s42, 15
-; GCN-NEXT:    v_writelane_b32 v1, s43, 16
-; GCN-NEXT:    v_writelane_b32 v1, s44, 17
-; GCN-NEXT:    v_writelane_b32 v1, s45, 18
-; GCN-NEXT:    v_writelane_b32 v1, s46, 19
-; GCN-NEXT:    v_writelane_b32 v1, s47, 20
-; GCN-NEXT:    v_writelane_b32 v1, s48, 21
-; GCN-NEXT:    v_writelane_b32 v1, s49, 22
-; GCN-NEXT:    v_writelane_b32 v1, s50, 23
-; GCN-NEXT:    v_writelane_b32 v1, s51, 24
-; GCN-NEXT:    v_writelane_b32 v1, s52, 25
-; GCN-NEXT:    v_writelane_b32 v1, s53, 26
-; GCN-NEXT:    v_writelane_b32 v1, s54, 27
-; GCN-NEXT:    v_writelane_b32 v1, s55, 28
-; GCN-NEXT:    v_writelane_b32 v1, s56, 29
-; GCN-NEXT:    v_writelane_b32 v1, s57, 30
-; GCN-NEXT:    v_writelane_b32 v1, s58, 31
-; GCN-NEXT:    v_writelane_b32 v1, s59, 32
-; GCN-NEXT:    v_writelane_b32 v1, s60, 33
-; GCN-NEXT:    v_writelane_b32 v1, s61, 34
-; GCN-NEXT:    v_writelane_b32 v1, s62, 35
-; GCN-NEXT:    v_writelane_b32 v1, s63, 36
-; GCN-NEXT:    v_writelane_b32 v1, s64, 37
-; GCN-NEXT:    v_writelane_b32 v1, s65, 38
-; GCN-NEXT:    v_writelane_b32 v1, s66, 39
-; GCN-NEXT:    v_writelane_b32 v1, s67, 40
-; GCN-NEXT:    v_writelane_b32 v1, s68, 41
-; GCN-NEXT:    v_writelane_b32 v1, s69, 42
-; GCN-NEXT:    v_writelane_b32 v1, s70, 43
-; GCN-NEXT:    v_writelane_b32 v1, s71, 44
-; GCN-NEXT:    v_writelane_b32 v1, s72, 45
-; GCN-NEXT:    v_writelane_b32 v1, s73, 46
-; GCN-NEXT:    v_writelane_b32 v1, s74, 47
-; GCN-NEXT:    v_writelane_b32 v1, s75, 48
-; GCN-NEXT:    v_writelane_b32 v1, s76, 49
-; GCN-NEXT:    v_writelane_b32 v1, s77, 50
-; GCN-NEXT:    v_writelane_b32 v1, s78, 51
-; GCN-NEXT:    v_writelane_b32 v1, s79, 52
-; GCN-NEXT:    v_writelane_b32 v1, s80, 53
-; GCN-NEXT:    v_writelane_b32 v1, s81, 54
-; GCN-NEXT:    v_writelane_b32 v1, s82, 55
-; GCN-NEXT:    v_writelane_b32 v1, s83, 56
-; GCN-NEXT:    v_writelane_b32 v1, s84, 57
-; GCN-NEXT:    v_writelane_b32 v1, s85, 58
-; GCN-NEXT:    v_writelane_b32 v1, s86, 59
-; GCN-NEXT:    v_writelane_b32 v1, s87, 60
-; GCN-NEXT:    v_writelane_b32 v1, s88, 61
-; GCN-NEXT:    v_writelane_b32 v1, s89, 62
-; GCN-NEXT:    v_writelane_b32 v1, s90, 63
-; GCN-NEXT:    v_writelane_b32 v2, s91, 0
-; GCN-NEXT:    v_writelane_b32 v2, s0, 1
-; GCN-NEXT:    v_writelane_b32 v2, s1, 2
-; GCN-NEXT:    v_writelane_b32 v2, s2, 3
-; GCN-NEXT:    v_writelane_b32 v2, s3, 4
-; GCN-NEXT:    v_writelane_b32 v2, s4, 5
-; GCN-NEXT:    v_writelane_b32 v2, s5, 6
-; GCN-NEXT:    v_writelane_b32 v2, s6, 7
-; GCN-NEXT:    v_writelane_b32 v2, s7, 8
+; GCN-NEXT:    s_cmp_lg_u32 s0, s1
 ; GCN-NEXT:    s_cbranch_scc1 BB0_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
-; GCN-NEXT:    v_readlane_b32 s0, v0, 1
-; GCN-NEXT:    v_readlane_b32 s1, v0, 2
-; GCN-NEXT:    v_readlane_b32 s2, v0, 3
-; GCN-NEXT:    v_readlane_b32 s3, v0, 4
-; GCN-NEXT:    v_readlane_b32 s4, v0, 5
-; GCN-NEXT:    v_readlane_b32 s5, v0, 6
-; GCN-NEXT:    v_readlane_b32 s6, v0, 7
-; GCN-NEXT:    v_readlane_b32 s7, v0, 8
+; GCN-NEXT:    v_readlane_b32 s8, v1, 56
+; GCN-NEXT:    v_readlane_b32 s9, v1, 57
+; GCN-NEXT:    v_readlane_b32 s10, v1, 58
+; GCN-NEXT:    v_readlane_b32 s11, v1, 59
+; GCN-NEXT:    v_readlane_b32 s12, v1, 60
+; GCN-NEXT:    v_readlane_b32 s13, v1, 61
+; GCN-NEXT:    v_readlane_b32 s14, v1, 62
+; GCN-NEXT:    v_readlane_b32 s15, v1, 63
+; GCN-NEXT:    v_readlane_b32 s16, v1, 48
+; GCN-NEXT:    v_readlane_b32 s17, v1, 49
+; GCN-NEXT:    v_readlane_b32 s18, v1, 50
+; GCN-NEXT:    v_readlane_b32 s19, v1, 51
+; GCN-NEXT:    v_readlane_b32 s20, v1, 52
+; GCN-NEXT:    v_readlane_b32 s21, v1, 53
+; GCN-NEXT:    v_readlane_b32 s22, v1, 54
+; GCN-NEXT:    v_readlane_b32 s23, v1, 55
+; GCN-NEXT:    v_readlane_b32 s24, v1, 40
+; GCN-NEXT:    v_readlane_b32 s25, v1, 41
+; GCN-NEXT:    v_readlane_b32 s26, v1, 42
+; GCN-NEXT:    v_readlane_b32 s27, v1, 43
+; GCN-NEXT:    v_readlane_b32 s28, v1, 44
+; GCN-NEXT:    v_readlane_b32 s29, v1, 45
+; GCN-NEXT:    v_readlane_b32 s30, v1, 46
+; GCN-NEXT:    v_readlane_b32 s31, v1, 47
+; GCN-NEXT:    v_readlane_b32 s36, v1, 32
+; GCN-NEXT:    v_readlane_b32 s37, v1, 33
+; GCN-NEXT:    v_readlane_b32 s38, v1, 34
+; GCN-NEXT:    v_readlane_b32 s39, v1, 35
+; GCN-NEXT:    v_readlane_b32 s40, v1, 36
+; GCN-NEXT:    v_readlane_b32 s41, v1, 37
+; GCN-NEXT:    v_readlane_b32 s42, v1, 38
+; GCN-NEXT:    v_readlane_b32 s43, v1, 39
+; GCN-NEXT:    v_readlane_b32 s44, v1, 24
+; GCN-NEXT:    v_readlane_b32 s45, v1, 25
+; GCN-NEXT:    v_readlane_b32 s46, v1, 26
+; GCN-NEXT:    v_readlane_b32 s47, v1, 27
+; GCN-NEXT:    v_readlane_b32 s48, v1, 28
+; GCN-NEXT:    v_readlane_b32 s49, v1, 29
+; GCN-NEXT:    v_readlane_b32 s50, v1, 30
+; GCN-NEXT:    v_readlane_b32 s51, v1, 31
+; GCN-NEXT:    v_readlane_b32 s52, v1, 16
+; GCN-NEXT:    v_readlane_b32 s53, v1, 17
+; GCN-NEXT:    v_readlane_b32 s54, v1, 18
+; GCN-NEXT:    v_readlane_b32 s55, v1, 19
+; GCN-NEXT:    v_readlane_b32 s56, v1, 20
+; GCN-NEXT:    v_readlane_b32 s57, v1, 21
+; GCN-NEXT:    v_readlane_b32 s58, v1, 22
+; GCN-NEXT:    v_readlane_b32 s59, v1, 23
+; GCN-NEXT:    v_readlane_b32 s60, v1, 8
+; GCN-NEXT:    v_readlane_b32 s61, v1, 9
+; GCN-NEXT:    v_readlane_b32 s62, v1, 10
+; GCN-NEXT:    v_readlane_b32 s63, v1, 11
+; GCN-NEXT:    v_readlane_b32 s64, v1, 12
+; GCN-NEXT:    v_readlane_b32 s65, v1, 13
+; GCN-NEXT:    v_readlane_b32 s66, v1, 14
+; GCN-NEXT:    v_readlane_b32 s67, v1, 15
+; GCN-NEXT:    v_readlane_b32 s68, v1, 0
+; GCN-NEXT:    v_readlane_b32 s69, v1, 1
+; GCN-NEXT:    v_readlane_b32 s70, v1, 2
+; GCN-NEXT:    v_readlane_b32 s71, v1, 3
+; GCN-NEXT:    v_readlane_b32 s72, v1, 4
+; GCN-NEXT:    v_readlane_b32 s73, v1, 5
+; GCN-NEXT:    v_readlane_b32 s74, v1, 6
+; GCN-NEXT:    v_readlane_b32 s75, v1, 7
+; GCN-NEXT:    v_readlane_b32 s76, v0, 56
+; GCN-NEXT:    v_readlane_b32 s77, v0, 57
+; GCN-NEXT:    v_readlane_b32 s78, v0, 58
+; GCN-NEXT:    v_readlane_b32 s79, v0, 59
+; GCN-NEXT:    v_readlane_b32 s80, v0, 60
+; GCN-NEXT:    v_readlane_b32 s81, v0, 61
+; GCN-NEXT:    v_readlane_b32 s82, v0, 62
+; GCN-NEXT:    v_readlane_b32 s83, v0, 63
+; GCN-NEXT:    v_readlane_b32 s84, v0, 48
+; GCN-NEXT:    v_readlane_b32 s85, v0, 49
+; GCN-NEXT:    v_readlane_b32 s86, v0, 50
+; GCN-NEXT:    v_readlane_b32 s87, v0, 51
+; GCN-NEXT:    v_readlane_b32 s88, v0, 52
+; GCN-NEXT:    v_readlane_b32 s89, v0, 53
+; GCN-NEXT:    v_readlane_b32 s90, v0, 54
+; GCN-NEXT:    v_readlane_b32 s91, v0, 55
+; GCN-NEXT:    v_readlane_b32 s0, v0, 0
+; GCN-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-NEXT:    v_readlane_b32 s2, v0, 2
+; GCN-NEXT:    v_readlane_b32 s3, v0, 3
+; GCN-NEXT:    v_readlane_b32 s4, v0, 4
+; GCN-NEXT:    v_readlane_b32 s5, v0, 5
+; GCN-NEXT:    v_readlane_b32 s6, v0, 6
+; GCN-NEXT:    v_readlane_b32 s7, v0, 7
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 57
-; GCN-NEXT:    v_readlane_b32 s1, v0, 58
-; GCN-NEXT:    v_readlane_b32 s2, v0, 59
-; GCN-NEXT:    v_readlane_b32 s3, v0, 60
-; GCN-NEXT:    v_readlane_b32 s4, v0, 61
-; GCN-NEXT:    v_readlane_b32 s5, v0, 62
-; GCN-NEXT:    v_readlane_b32 s6, v0, 63
-; GCN-NEXT:    v_readlane_b32 s7, v1, 0
+; GCN-NEXT:    v_readlane_b32 s0, v0, 8
+; GCN-NEXT:    v_readlane_b32 s1, v0, 9
+; GCN-NEXT:    v_readlane_b32 s2, v0, 10
+; GCN-NEXT:    v_readlane_b32 s3, v0, 11
+; GCN-NEXT:    v_readlane_b32 s4, v0, 12
+; GCN-NEXT:    v_readlane_b32 s5, v0, 13
+; GCN-NEXT:    v_readlane_b32 s6, v0, 14
+; GCN-NEXT:    v_readlane_b32 s7, v0, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 1
-; GCN-NEXT:    v_readlane_b32 s1, v1, 2
-; GCN-NEXT:    v_readlane_b32 s2, v1, 3
-; GCN-NEXT:    v_readlane_b32 s3, v1, 4
-; GCN-NEXT:    v_readlane_b32 s4, v1, 5
-; GCN-NEXT:    v_readlane_b32 s5, v1, 6
-; GCN-NEXT:    v_readlane_b32 s6, v1, 7
-; GCN-NEXT:    v_readlane_b32 s7, v1, 8
+; GCN-NEXT:    v_readlane_b32 s0, v0, 16
+; GCN-NEXT:    v_readlane_b32 s1, v0, 17
+; GCN-NEXT:    v_readlane_b32 s2, v0, 18
+; GCN-NEXT:    v_readlane_b32 s3, v0, 19
+; GCN-NEXT:    v_readlane_b32 s4, v0, 20
+; GCN-NEXT:    v_readlane_b32 s5, v0, 21
+; GCN-NEXT:    v_readlane_b32 s6, v0, 22
+; GCN-NEXT:    v_readlane_b32 s7, v0, 23
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 9
-; GCN-NEXT:    v_readlane_b32 s1, v1, 10
-; GCN-NEXT:    v_readlane_b32 s2, v1, 11
-; GCN-NEXT:    v_readlane_b32 s3, v1, 12
-; GCN-NEXT:    v_readlane_b32 s4, v1, 13
-; GCN-NEXT:    v_readlane_b32 s5, v1, 14
-; GCN-NEXT:    v_readlane_b32 s6, v1, 15
-; GCN-NEXT:    v_readlane_b32 s7, v1, 16
+; GCN-NEXT:    v_readlane_b32 s0, v0, 24
+; GCN-NEXT:    v_readlane_b32 s1, v0, 25
+; GCN-NEXT:    v_readlane_b32 s2, v0, 26
+; GCN-NEXT:    v_readlane_b32 s3, v0, 27
+; GCN-NEXT:    v_readlane_b32 s4, v0, 28
+; GCN-NEXT:    v_readlane_b32 s5, v0, 29
+; GCN-NEXT:    v_readlane_b32 s6, v0, 30
+; GCN-NEXT:    v_readlane_b32 s7, v0, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 17
-; GCN-NEXT:    v_readlane_b32 s1, v1, 18
-; GCN-NEXT:    v_readlane_b32 s2, v1, 19
-; GCN-NEXT:    v_readlane_b32 s3, v1, 20
-; GCN-NEXT:    v_readlane_b32 s4, v1, 21
-; GCN-NEXT:    v_readlane_b32 s5, v1, 22
-; GCN-NEXT:    v_readlane_b32 s6, v1, 23
-; GCN-NEXT:    v_readlane_b32 s7, v1, 24
+; GCN-NEXT:    v_readlane_b32 s0, v0, 32
+; GCN-NEXT:    v_readlane_b32 s1, v0, 33
+; GCN-NEXT:    v_readlane_b32 s2, v0, 34
+; GCN-NEXT:    v_readlane_b32 s3, v0, 35
+; GCN-NEXT:    v_readlane_b32 s4, v0, 36
+; GCN-NEXT:    v_readlane_b32 s5, v0, 37
+; GCN-NEXT:    v_readlane_b32 s6, v0, 38
+; GCN-NEXT:    v_readlane_b32 s7, v0, 39
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 25
-; GCN-NEXT:    v_readlane_b32 s1, v1, 26
-; GCN-NEXT:    v_readlane_b32 s2, v1, 27
-; GCN-NEXT:    v_readlane_b32 s3, v1, 28
-; GCN-NEXT:    v_readlane_b32 s4, v1, 29
-; GCN-NEXT:    v_readlane_b32 s5, v1, 30
-; GCN-NEXT:    v_readlane_b32 s6, v1, 31
-; GCN-NEXT:    v_readlane_b32 s7, v1, 32
+; GCN-NEXT:    v_readlane_b32 s0, v0, 40
+; GCN-NEXT:    v_readlane_b32 s1, v0, 41
+; GCN-NEXT:    v_readlane_b32 s2, v0, 42
+; GCN-NEXT:    v_readlane_b32 s3, v0, 43
+; GCN-NEXT:    v_readlane_b32 s4, v0, 44
+; GCN-NEXT:    v_readlane_b32 s5, v0, 45
+; GCN-NEXT:    v_readlane_b32 s6, v0, 46
+; GCN-NEXT:    v_readlane_b32 s7, v0, 47
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 33
-; GCN-NEXT:    v_readlane_b32 s1, v1, 34
-; GCN-NEXT:    v_readlane_b32 s2, v1, 35
-; GCN-NEXT:    v_readlane_b32 s3, v1, 36
-; GCN-NEXT:    v_readlane_b32 s4, v1, 37
-; GCN-NEXT:    v_readlane_b32 s5, v1, 38
-; GCN-NEXT:    v_readlane_b32 s6, v1, 39
-; GCN-NEXT:    v_readlane_b32 s7, v1, 40
+; GCN-NEXT:    v_readlane_b32 s0, v2, 0
+; GCN-NEXT:    v_readlane_b32 s1, v2, 1
+; GCN-NEXT:    v_readlane_b32 s2, v2, 2
+; GCN-NEXT:    v_readlane_b32 s3, v2, 3
+; GCN-NEXT:    v_readlane_b32 s4, v2, 4
+; GCN-NEXT:    v_readlane_b32 s5, v2, 5
+; GCN-NEXT:    v_readlane_b32 s6, v2, 6
+; GCN-NEXT:    v_readlane_b32 s7, v2, 7
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[84:91]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 41
-; GCN-NEXT:    v_readlane_b32 s1, v1, 42
-; GCN-NEXT:    v_readlane_b32 s2, v1, 43
-; GCN-NEXT:    v_readlane_b32 s3, v1, 44
-; GCN-NEXT:    v_readlane_b32 s4, v1, 45
-; GCN-NEXT:    v_readlane_b32 s5, v1, 46
-; GCN-NEXT:    v_readlane_b32 s6, v1, 47
-; GCN-NEXT:    v_readlane_b32 s7, v1, 48
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[76:83]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 49
-; GCN-NEXT:    v_readlane_b32 s1, v1, 50
-; GCN-NEXT:    v_readlane_b32 s2, v1, 51
-; GCN-NEXT:    v_readlane_b32 s3, v1, 52
-; GCN-NEXT:    v_readlane_b32 s4, v1, 53
-; GCN-NEXT:    v_readlane_b32 s5, v1, 54
-; GCN-NEXT:    v_readlane_b32 s6, v1, 55
-; GCN-NEXT:    v_readlane_b32 s7, v1, 56
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[68:75]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 57
-; GCN-NEXT:    v_readlane_b32 s1, v1, 58
-; GCN-NEXT:    v_readlane_b32 s2, v1, 59
-; GCN-NEXT:    v_readlane_b32 s3, v1, 60
-; GCN-NEXT:    v_readlane_b32 s4, v1, 61
-; GCN-NEXT:    v_readlane_b32 s5, v1, 62
-; GCN-NEXT:    v_readlane_b32 s6, v1, 63
-; GCN-NEXT:    v_readlane_b32 s7, v2, 0
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[60:67]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 9
-; GCN-NEXT:    v_readlane_b32 s1, v0, 10
-; GCN-NEXT:    v_readlane_b32 s2, v0, 11
-; GCN-NEXT:    v_readlane_b32 s3, v0, 12
-; GCN-NEXT:    v_readlane_b32 s4, v0, 13
-; GCN-NEXT:    v_readlane_b32 s5, v0, 14
-; GCN-NEXT:    v_readlane_b32 s6, v0, 15
-; GCN-NEXT:    v_readlane_b32 s7, v0, 16
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[52:59]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 17
-; GCN-NEXT:    v_readlane_b32 s1, v0, 18
-; GCN-NEXT:    v_readlane_b32 s2, v0, 19
-; GCN-NEXT:    v_readlane_b32 s3, v0, 20
-; GCN-NEXT:    v_readlane_b32 s4, v0, 21
-; GCN-NEXT:    v_readlane_b32 s5, v0, 22
-; GCN-NEXT:    v_readlane_b32 s6, v0, 23
-; GCN-NEXT:    v_readlane_b32 s7, v0, 24
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[44:51]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 25
-; GCN-NEXT:    v_readlane_b32 s1, v0, 26
-; GCN-NEXT:    v_readlane_b32 s2, v0, 27
-; GCN-NEXT:    v_readlane_b32 s3, v0, 28
-; GCN-NEXT:    v_readlane_b32 s4, v0, 29
-; GCN-NEXT:    v_readlane_b32 s5, v0, 30
-; GCN-NEXT:    v_readlane_b32 s6, v0, 31
-; GCN-NEXT:    v_readlane_b32 s7, v0, 32
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[36:43]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 33
-; GCN-NEXT:    v_readlane_b32 s1, v0, 34
-; GCN-NEXT:    v_readlane_b32 s2, v0, 35
-; GCN-NEXT:    v_readlane_b32 s3, v0, 36
-; GCN-NEXT:    v_readlane_b32 s4, v0, 37
-; GCN-NEXT:    v_readlane_b32 s5, v0, 38
-; GCN-NEXT:    v_readlane_b32 s6, v0, 39
-; GCN-NEXT:    v_readlane_b32 s7, v0, 40
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[24:31]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 41
-; GCN-NEXT:    v_readlane_b32 s1, v0, 42
-; GCN-NEXT:    v_readlane_b32 s2, v0, 43
-; GCN-NEXT:    v_readlane_b32 s3, v0, 44
-; GCN-NEXT:    v_readlane_b32 s4, v0, 45
-; GCN-NEXT:    v_readlane_b32 s5, v0, 46
-; GCN-NEXT:    v_readlane_b32 s6, v0, 47
-; GCN-NEXT:    v_readlane_b32 s7, v0, 48
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[16:23]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 49
-; GCN-NEXT:    v_readlane_b32 s1, v0, 50
-; GCN-NEXT:    v_readlane_b32 s2, v0, 51
-; GCN-NEXT:    v_readlane_b32 s3, v0, 52
-; GCN-NEXT:    v_readlane_b32 s4, v0, 53
-; GCN-NEXT:    v_readlane_b32 s5, v0, 54
-; GCN-NEXT:    v_readlane_b32 s6, v0, 55
-; GCN-NEXT:    v_readlane_b32 s7, v0, 56
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[8:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v2, 1
-; GCN-NEXT:    v_readlane_b32 s1, v2, 2
-; GCN-NEXT:    v_readlane_b32 s2, v2, 3
-; GCN-NEXT:    v_readlane_b32 s3, v2, 4
-; GCN-NEXT:    v_readlane_b32 s4, v2, 5
-; GCN-NEXT:    v_readlane_b32 s5, v2, 6
-; GCN-NEXT:    v_readlane_b32 s6, v2, 7
-; GCN-NEXT:    v_readlane_b32 s7, v2, 8
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:7]
 ; GCN-NEXT:    ;;#ASMEND
@@ -448,191 +446,189 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 0
+; GCN-NEXT:    v_writelane_b32 v0, s5, 1
+; GCN-NEXT:    v_writelane_b32 v0, s6, 2
+; GCN-NEXT:    v_writelane_b32 v0, s7, 3
+; GCN-NEXT:    v_writelane_b32 v0, s8, 4
+; GCN-NEXT:    v_writelane_b32 v0, s9, 5
+; GCN-NEXT:    v_writelane_b32 v0, s10, 6
+; GCN-NEXT:    v_writelane_b32 v0, s11, 7
+; GCN-NEXT:    v_writelane_b32 v0, s12, 8
+; GCN-NEXT:    v_writelane_b32 v0, s13, 9
+; GCN-NEXT:    v_writelane_b32 v0, s14, 10
+; GCN-NEXT:    v_writelane_b32 v0, s15, 11
+; GCN-NEXT:    v_writelane_b32 v0, s16, 12
+; GCN-NEXT:    v_writelane_b32 v0, s17, 13
+; GCN-NEXT:    v_writelane_b32 v0, s18, 14
+; GCN-NEXT:    v_writelane_b32 v0, s19, 15
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:19]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 16
+; GCN-NEXT:    v_writelane_b32 v0, s5, 17
+; GCN-NEXT:    v_writelane_b32 v0, s6, 18
+; GCN-NEXT:    v_writelane_b32 v0, s7, 19
+; GCN-NEXT:    v_writelane_b32 v0, s8, 20
+; GCN-NEXT:    v_writelane_b32 v0, s9, 21
+; GCN-NEXT:    v_writelane_b32 v0, s10, 22
+; GCN-NEXT:    v_writelane_b32 v0, s11, 23
+; GCN-NEXT:    v_writelane_b32 v0, s12, 24
+; GCN-NEXT:    v_writelane_b32 v0, s13, 25
+; GCN-NEXT:    v_writelane_b32 v0, s14, 26
+; GCN-NEXT:    v_writelane_b32 v0, s15, 27
+; GCN-NEXT:    v_writelane_b32 v0, s16, 28
+; GCN-NEXT:    v_writelane_b32 v0, s17, 29
+; GCN-NEXT:    v_writelane_b32 v0, s18, 30
+; GCN-NEXT:    v_writelane_b32 v0, s19, 31
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:19]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 32
+; GCN-NEXT:    v_writelane_b32 v0, s5, 33
+; GCN-NEXT:    v_writelane_b32 v0, s6, 34
+; GCN-NEXT:    v_writelane_b32 v0, s7, 35
+; GCN-NEXT:    v_writelane_b32 v0, s8, 36
+; GCN-NEXT:    v_writelane_b32 v0, s9, 37
+; GCN-NEXT:    v_writelane_b32 v0, s10, 38
+; GCN-NEXT:    v_writelane_b32 v0, s11, 39
+; GCN-NEXT:    v_writelane_b32 v0, s12, 40
+; GCN-NEXT:    v_writelane_b32 v0, s13, 41
+; GCN-NEXT:    v_writelane_b32 v0, s14, 42
+; GCN-NEXT:    v_writelane_b32 v0, s15, 43
+; GCN-NEXT:    v_writelane_b32 v0, s16, 44
+; GCN-NEXT:    v_writelane_b32 v0, s17, 45
+; GCN-NEXT:    v_writelane_b32 v0, s18, 46
+; GCN-NEXT:    v_writelane_b32 v0, s19, 47
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[36:51]
+; GCN-NEXT:    ; def s[4:19]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s4, 48
+; GCN-NEXT:    v_writelane_b32 v0, s5, 49
+; GCN-NEXT:    v_writelane_b32 v0, s6, 50
+; GCN-NEXT:    v_writelane_b32 v0, s7, 51
+; GCN-NEXT:    v_writelane_b32 v0, s8, 52
+; GCN-NEXT:    v_writelane_b32 v0, s9, 53
+; GCN-NEXT:    v_writelane_b32 v0, s10, 54
+; GCN-NEXT:    v_writelane_b32 v0, s11, 55
+; GCN-NEXT:    v_writelane_b32 v0, s12, 56
+; GCN-NEXT:    v_writelane_b32 v0, s13, 57
+; GCN-NEXT:    v_writelane_b32 v0, s14, 58
+; GCN-NEXT:    v_writelane_b32 v0, s15, 59
+; GCN-NEXT:    v_writelane_b32 v0, s16, 60
+; GCN-NEXT:    v_writelane_b32 v0, s17, 61
+; GCN-NEXT:    v_writelane_b32 v0, s18, 62
+; GCN-NEXT:    v_writelane_b32 v0, s19, 63
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[4:11]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s4, 0
+; GCN-NEXT:    v_writelane_b32 v1, s5, 1
+; GCN-NEXT:    v_writelane_b32 v1, s6, 2
+; GCN-NEXT:    v_writelane_b32 v1, s7, 3
+; GCN-NEXT:    v_writelane_b32 v1, s8, 4
+; GCN-NEXT:    v_writelane_b32 v1, s9, 5
+; GCN-NEXT:    v_writelane_b32 v1, s10, 6
+; GCN-NEXT:    v_writelane_b32 v1, s11, 7
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[2:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v1, s2, 8
+; GCN-NEXT:    v_writelane_b32 v1, s3, 9
+; GCN-NEXT:    s_mov_b32 s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_writelane_b32 v0, s0, 0
-; GCN-NEXT:    v_writelane_b32 v0, s4, 1
-; GCN-NEXT:    v_writelane_b32 v0, s5, 2
-; GCN-NEXT:    v_writelane_b32 v0, s6, 3
-; GCN-NEXT:    v_writelane_b32 v0, s7, 4
-; GCN-NEXT:    v_writelane_b32 v0, s8, 5
-; GCN-NEXT:    v_writelane_b32 v0, s9, 6
-; GCN-NEXT:    v_writelane_b32 v0, s10, 7
-; GCN-NEXT:    v_writelane_b32 v0, s11, 8
-; GCN-NEXT:    v_writelane_b32 v0, s12, 9
-; GCN-NEXT:    v_writelane_b32 v0, s13, 10
-; GCN-NEXT:    v_writelane_b32 v0, s14, 11
-; GCN-NEXT:    v_writelane_b32 v0, s15, 12
-; GCN-NEXT:    v_writelane_b32 v0, s16, 13
-; GCN-NEXT:    v_writelane_b32 v0, s17, 14
-; GCN-NEXT:    v_writelane_b32 v0, s18, 15
-; GCN-NEXT:    v_writelane_b32 v0, s19, 16
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:15]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[16:31]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_writelane_b32 v0, s0, 17
-; GCN-NEXT:    v_writelane_b32 v0, s1, 18
-; GCN-NEXT:    v_writelane_b32 v0, s2, 19
-; GCN-NEXT:    v_writelane_b32 v0, s3, 20
-; GCN-NEXT:    v_writelane_b32 v0, s4, 21
-; GCN-NEXT:    v_writelane_b32 v0, s5, 22
-; GCN-NEXT:    v_writelane_b32 v0, s6, 23
-; GCN-NEXT:    v_writelane_b32 v0, s7, 24
-; GCN-NEXT:    v_writelane_b32 v0, s8, 25
-; GCN-NEXT:    v_writelane_b32 v0, s9, 26
-; GCN-NEXT:    v_writelane_b32 v0, s10, 27
-; GCN-NEXT:    v_writelane_b32 v0, s11, 28
-; GCN-NEXT:    v_writelane_b32 v0, s12, 29
-; GCN-NEXT:    v_writelane_b32 v0, s13, 30
-; GCN-NEXT:    v_writelane_b32 v0, s14, 31
-; GCN-NEXT:    v_writelane_b32 v0, s15, 32
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:7]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[8:9]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_mov_b32 s10, 0
-; GCN-NEXT:    v_readlane_b32 s11, v0, 0
-; GCN-NEXT:    s_cmp_lg_u32 s11, s10
-; GCN-NEXT:    v_writelane_b32 v0, s36, 33
-; GCN-NEXT:    v_writelane_b32 v0, s37, 34
-; GCN-NEXT:    v_writelane_b32 v0, s38, 35
-; GCN-NEXT:    v_writelane_b32 v0, s39, 36
-; GCN-NEXT:    v_writelane_b32 v0, s40, 37
-; GCN-NEXT:    v_writelane_b32 v0, s41, 38
-; GCN-NEXT:    v_writelane_b32 v0, s42, 39
-; GCN-NEXT:    v_writelane_b32 v0, s43, 40
-; GCN-NEXT:    v_writelane_b32 v0, s44, 41
-; GCN-NEXT:    v_writelane_b32 v0, s45, 42
-; GCN-NEXT:    v_writelane_b32 v0, s46, 43
-; GCN-NEXT:    v_writelane_b32 v0, s47, 44
-; GCN-NEXT:    v_writelane_b32 v0, s48, 45
-; GCN-NEXT:    v_writelane_b32 v0, s49, 46
-; GCN-NEXT:    v_writelane_b32 v0, s50, 47
-; GCN-NEXT:    v_writelane_b32 v0, s51, 48
-; GCN-NEXT:    v_writelane_b32 v0, s16, 49
-; GCN-NEXT:    v_writelane_b32 v0, s17, 50
-; GCN-NEXT:    v_writelane_b32 v0, s18, 51
-; GCN-NEXT:    v_writelane_b32 v0, s19, 52
-; GCN-NEXT:    v_writelane_b32 v0, s20, 53
-; GCN-NEXT:    v_writelane_b32 v0, s21, 54
-; GCN-NEXT:    v_writelane_b32 v0, s22, 55
-; GCN-NEXT:    v_writelane_b32 v0, s23, 56
-; GCN-NEXT:    v_writelane_b32 v0, s24, 57
-; GCN-NEXT:    v_writelane_b32 v0, s25, 58
-; GCN-NEXT:    v_writelane_b32 v0, s26, 59
-; GCN-NEXT:    v_writelane_b32 v0, s27, 60
-; GCN-NEXT:    v_writelane_b32 v0, s28, 61
-; GCN-NEXT:    v_writelane_b32 v0, s29, 62
-; GCN-NEXT:    v_writelane_b32 v0, s30, 63
-; GCN-NEXT:    v_writelane_b32 v1, s31, 0
-; GCN-NEXT:    v_writelane_b32 v1, s0, 1
-; GCN-NEXT:    v_writelane_b32 v1, s1, 2
-; GCN-NEXT:    v_writelane_b32 v1, s2, 3
-; GCN-NEXT:    v_writelane_b32 v1, s3, 4
-; GCN-NEXT:    v_writelane_b32 v1, s4, 5
-; GCN-NEXT:    v_writelane_b32 v1, s5, 6
-; GCN-NEXT:    v_writelane_b32 v1, s6, 7
-; GCN-NEXT:    v_writelane_b32 v1, s7, 8
-; GCN-NEXT:    v_writelane_b32 v1, s8, 9
-; GCN-NEXT:    v_writelane_b32 v1, s9, 10
+; GCN-NEXT:    s_cmp_lg_u32 s0, s1
 ; GCN-NEXT:    s_cbranch_scc1 BB1_2
 ; GCN-NEXT:  ; %bb.1: ; %bb0
-; GCN-NEXT:    v_readlane_b32 s0, v0, 1
-; GCN-NEXT:    v_readlane_b32 s1, v0, 2
-; GCN-NEXT:    v_readlane_b32 s2, v0, 3
-; GCN-NEXT:    v_readlane_b32 s3, v0, 4
-; GCN-NEXT:    v_readlane_b32 s4, v0, 5
-; GCN-NEXT:    v_readlane_b32 s5, v0, 6
-; GCN-NEXT:    v_readlane_b32 s6, v0, 7
-; GCN-NEXT:    v_readlane_b32 s7, v0, 8
-; GCN-NEXT:    v_readlane_b32 s8, v0, 9
-; GCN-NEXT:    v_readlane_b32 s9, v0, 10
-; GCN-NEXT:    v_readlane_b32 s10, v0, 11
-; GCN-NEXT:    v_readlane_b32 s11, v0, 12
-; GCN-NEXT:    v_readlane_b32 s12, v0, 13
-; GCN-NEXT:    v_readlane_b32 s13, v0, 14
-; GCN-NEXT:    v_readlane_b32 s14, v0, 15
-; GCN-NEXT:    v_readlane_b32 s15, v0, 16
+; GCN-NEXT:    v_readlane_b32 s16, v1, 8
+; GCN-NEXT:    v_readlane_b32 s17, v1, 9
+; GCN-NEXT:    v_readlane_b32 s20, v1, 0
+; GCN-NEXT:    v_readlane_b32 s21, v1, 1
+; GCN-NEXT:    v_readlane_b32 s22, v1, 2
+; GCN-NEXT:    v_readlane_b32 s23, v1, 3
+; GCN-NEXT:    v_readlane_b32 s24, v1, 4
+; GCN-NEXT:    v_readlane_b32 s25, v1, 5
+; GCN-NEXT:    v_readlane_b32 s26, v1, 6
+; GCN-NEXT:    v_readlane_b32 s27, v1, 7
+; GCN-NEXT:    v_readlane_b32 s36, v0, 32
+; GCN-NEXT:    v_readlane_b32 s37, v0, 33
+; GCN-NEXT:    v_readlane_b32 s38, v0, 34
+; GCN-NEXT:    v_readlane_b32 s39, v0, 35
+; GCN-NEXT:    v_readlane_b32 s40, v0, 36
+; GCN-NEXT:    v_readlane_b32 s41, v0, 37
+; GCN-NEXT:    v_readlane_b32 s42, v0, 38
+; GCN-NEXT:    v_readlane_b32 s43, v0, 39
+; GCN-NEXT:    v_readlane_b32 s44, v0, 40
+; GCN-NEXT:    v_readlane_b32 s45, v0, 41
+; GCN-NEXT:    v_readlane_b32 s46, v0, 42
+; GCN-NEXT:    v_readlane_b32 s47, v0, 43
+; GCN-NEXT:    v_readlane_b32 s48, v0, 44
+; GCN-NEXT:    v_readlane_b32 s49, v0, 45
+; GCN-NEXT:    v_readlane_b32 s50, v0, 46
+; GCN-NEXT:    v_readlane_b32 s51, v0, 47
+; GCN-NEXT:    v_readlane_b32 s0, v0, 0
+; GCN-NEXT:    v_readlane_b32 s1, v0, 1
+; GCN-NEXT:    v_readlane_b32 s2, v0, 2
+; GCN-NEXT:    v_readlane_b32 s3, v0, 3
+; GCN-NEXT:    v_readlane_b32 s4, v0, 4
+; GCN-NEXT:    v_readlane_b32 s5, v0, 5
+; GCN-NEXT:    v_readlane_b32 s6, v0, 6
+; GCN-NEXT:    v_readlane_b32 s7, v0, 7
+; GCN-NEXT:    v_readlane_b32 s8, v0, 8
+; GCN-NEXT:    v_readlane_b32 s9, v0, 9
+; GCN-NEXT:    v_readlane_b32 s10, v0, 10
+; GCN-NEXT:    v_readlane_b32 s11, v0, 11
+; GCN-NEXT:    v_readlane_b32 s12, v0, 12
+; GCN-NEXT:    v_readlane_b32 s13, v0, 13
+; GCN-NEXT:    v_readlane_b32 s14, v0, 14
+; GCN-NEXT:    v_readlane_b32 s15, v0, 15
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 33
-; GCN-NEXT:    v_readlane_b32 s1, v0, 34
-; GCN-NEXT:    v_readlane_b32 s2, v0, 35
-; GCN-NEXT:    v_readlane_b32 s3, v0, 36
-; GCN-NEXT:    v_readlane_b32 s4, v0, 37
-; GCN-NEXT:    v_readlane_b32 s5, v0, 38
-; GCN-NEXT:    v_readlane_b32 s6, v0, 39
-; GCN-NEXT:    v_readlane_b32 s7, v0, 40
-; GCN-NEXT:    v_readlane_b32 s8, v0, 41
-; GCN-NEXT:    v_readlane_b32 s9, v0, 42
-; GCN-NEXT:    v_readlane_b32 s10, v0, 43
-; GCN-NEXT:    v_readlane_b32 s11, v0, 44
-; GCN-NEXT:    v_readlane_b32 s12, v0, 45
-; GCN-NEXT:    v_readlane_b32 s13, v0, 46
-; GCN-NEXT:    v_readlane_b32 s14, v0, 47
-; GCN-NEXT:    v_readlane_b32 s15, v0, 48
+; GCN-NEXT:    v_readlane_b32 s0, v0, 16
+; GCN-NEXT:    v_readlane_b32 s1, v0, 17
+; GCN-NEXT:    v_readlane_b32 s2, v0, 18
+; GCN-NEXT:    v_readlane_b32 s3, v0, 19
+; GCN-NEXT:    v_readlane_b32 s4, v0, 20
+; GCN-NEXT:    v_readlane_b32 s5, v0, 21
+; GCN-NEXT:    v_readlane_b32 s6, v0, 22
+; GCN-NEXT:    v_readlane_b32 s7, v0, 23
+; GCN-NEXT:    v_readlane_b32 s8, v0, 24
+; GCN-NEXT:    v_readlane_b32 s9, v0, 25
+; GCN-NEXT:    v_readlane_b32 s10, v0, 26
+; GCN-NEXT:    v_readlane_b32 s11, v0, 27
+; GCN-NEXT:    v_readlane_b32 s12, v0, 28
+; GCN-NEXT:    v_readlane_b32 s13, v0, 29
+; GCN-NEXT:    v_readlane_b32 s14, v0, 30
+; GCN-NEXT:    v_readlane_b32 s15, v0, 31
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 17
-; GCN-NEXT:    v_readlane_b32 s1, v0, 18
-; GCN-NEXT:    v_readlane_b32 s2, v0, 19
-; GCN-NEXT:    v_readlane_b32 s3, v0, 20
-; GCN-NEXT:    v_readlane_b32 s4, v0, 21
-; GCN-NEXT:    v_readlane_b32 s5, v0, 22
-; GCN-NEXT:    v_readlane_b32 s6, v0, 23
-; GCN-NEXT:    v_readlane_b32 s7, v0, 24
-; GCN-NEXT:    v_readlane_b32 s8, v0, 25
-; GCN-NEXT:    v_readlane_b32 s9, v0, 26
-; GCN-NEXT:    v_readlane_b32 s10, v0, 27
-; GCN-NEXT:    v_readlane_b32 s11, v0, 28
-; GCN-NEXT:    v_readlane_b32 s12, v0, 29
-; GCN-NEXT:    v_readlane_b32 s13, v0, 30
-; GCN-NEXT:    v_readlane_b32 s14, v0, 31
-; GCN-NEXT:    v_readlane_b32 s15, v0, 32
+; GCN-NEXT:    v_readlane_b32 s0, v0, 48
+; GCN-NEXT:    v_readlane_b32 s1, v0, 49
+; GCN-NEXT:    v_readlane_b32 s2, v0, 50
+; GCN-NEXT:    v_readlane_b32 s3, v0, 51
+; GCN-NEXT:    v_readlane_b32 s4, v0, 52
+; GCN-NEXT:    v_readlane_b32 s5, v0, 53
+; GCN-NEXT:    v_readlane_b32 s6, v0, 54
+; GCN-NEXT:    v_readlane_b32 s7, v0, 55
+; GCN-NEXT:    v_readlane_b32 s8, v0, 56
+; GCN-NEXT:    v_readlane_b32 s9, v0, 57
+; GCN-NEXT:    v_readlane_b32 s10, v0, 58
+; GCN-NEXT:    v_readlane_b32 s11, v0, 59
+; GCN-NEXT:    v_readlane_b32 s12, v0, 60
+; GCN-NEXT:    v_readlane_b32 s13, v0, 61
+; GCN-NEXT:    v_readlane_b32 s14, v0, 62
+; GCN-NEXT:    v_readlane_b32 s15, v0, 63
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:15]
+; GCN-NEXT:    ; use s[36:51]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 1
-; GCN-NEXT:    v_readlane_b32 s1, v1, 2
-; GCN-NEXT:    v_readlane_b32 s2, v1, 3
-; GCN-NEXT:    v_readlane_b32 s3, v1, 4
-; GCN-NEXT:    v_readlane_b32 s4, v1, 5
-; GCN-NEXT:    v_readlane_b32 s5, v1, 6
-; GCN-NEXT:    v_readlane_b32 s6, v1, 7
-; GCN-NEXT:    v_readlane_b32 s7, v1, 8
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:7]
+; GCN-NEXT:    ; use s[20:27]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v1, 9
-; GCN-NEXT:    v_readlane_b32 s1, v1, 10
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:1]
+; GCN-NEXT:    ; use s[16:17]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v0, 49
-; GCN-NEXT:    v_readlane_b32 s1, v0, 50
-; GCN-NEXT:    v_readlane_b32 s2, v0, 51
-; GCN-NEXT:    v_readlane_b32 s3, v0, 52
-; GCN-NEXT:    v_readlane_b32 s4, v0, 53
-; GCN-NEXT:    v_readlane_b32 s5, v0, 54
-; GCN-NEXT:    v_readlane_b32 s6, v0, 55
-; GCN-NEXT:    v_readlane_b32 s7, v0, 56
-; GCN-NEXT:    v_readlane_b32 s8, v0, 57
-; GCN-NEXT:    v_readlane_b32 s9, v0, 58
-; GCN-NEXT:    v_readlane_b32 s10, v0, 59
-; GCN-NEXT:    v_readlane_b32 s11, v0, 60
-; GCN-NEXT:    v_readlane_b32 s12, v0, 61
-; GCN-NEXT:    v_readlane_b32 s13, v0, 62
-; GCN-NEXT:    v_readlane_b32 s14, v0, 63
-; GCN-NEXT:    v_readlane_b32 s15, v1, 0
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
@@ -667,12 +663,12 @@ ret:
 define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 {
 ; GCN-LABEL: no_vgprs_last_sgpr_spill:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_mov_b32 s56, SCRATCH_RSRC_DWORD0
-; GCN-NEXT:    s_mov_b32 s57, SCRATCH_RSRC_DWORD1
-; GCN-NEXT:    s_mov_b32 s58, -1
-; GCN-NEXT:    s_mov_b32 s59, 0xe8f000
-; GCN-NEXT:    s_add_u32 s56, s56, s3
-; GCN-NEXT:    s_addc_u32 s57, s57, 0
+; GCN-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
+; GCN-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
+; GCN-NEXT:    s_mov_b32 s54, -1
+; GCN-NEXT:    s_mov_b32 s55, 0xe8f000
+; GCN-NEXT:    s_add_u32 s52, s52, s3
+; GCN-NEXT:    s_addc_u32 s53, s53, 0
 ; GCN-NEXT:    s_load_dword s0, s[0:1], 0xb
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ;;#ASMEND
@@ -689,180 +685,176 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v31, s4, 0
+; GCN-NEXT:    v_writelane_b32 v31, s5, 1
+; GCN-NEXT:    v_writelane_b32 v31, s6, 2
+; GCN-NEXT:    v_writelane_b32 v31, s7, 3
+; GCN-NEXT:    v_writelane_b32 v31, s8, 4
+; GCN-NEXT:    v_writelane_b32 v31, s9, 5
+; GCN-NEXT:    v_writelane_b32 v31, s10, 6
+; GCN-NEXT:    v_writelane_b32 v31, s11, 7
+; GCN-NEXT:    v_writelane_b32 v31, s12, 8
+; GCN-NEXT:    v_writelane_b32 v31, s13, 9
+; GCN-NEXT:    v_writelane_b32 v31, s14, 10
+; GCN-NEXT:    v_writelane_b32 v31, s15, 11
+; GCN-NEXT:    v_writelane_b32 v31, s16, 12
+; GCN-NEXT:    v_writelane_b32 v31, s17, 13
+; GCN-NEXT:    v_writelane_b32 v31, s18, 14
+; GCN-NEXT:    v_writelane_b32 v31, s19, 15
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[36:51]
+; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_writelane_b32 v31, s0, 0
-; GCN-NEXT:    v_writelane_b32 v31, s4, 1
-; GCN-NEXT:    v_writelane_b32 v31, s5, 2
-; GCN-NEXT:    v_writelane_b32 v31, s6, 3
-; GCN-NEXT:    v_writelane_b32 v31, s7, 4
-; GCN-NEXT:    v_writelane_b32 v31, s8, 5
-; GCN-NEXT:    v_writelane_b32 v31, s9, 6
-; GCN-NEXT:    v_writelane_b32 v31, s10, 7
-; GCN-NEXT:    v_writelane_b32 v31, s11, 8
-; GCN-NEXT:    v_writelane_b32 v31, s12, 9
-; GCN-NEXT:    v_writelane_b32 v31, s13, 10
-; GCN-NEXT:    v_writelane_b32 v31, s14, 11
-; GCN-NEXT:    v_writelane_b32 v31, s15, 12
-; GCN-NEXT:    v_writelane_b32 v31, s16, 13
-; GCN-NEXT:    v_writelane_b32 v31, s17, 14
-; GCN-NEXT:    v_writelane_b32 v31, s18, 15
-; GCN-NEXT:    v_writelane_b32 v31, s19, 16
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[0:15]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[16:31]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; def s[34:35]
-; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    s_mov_b32 s33, 0
-; GCN-NEXT:    v_readlane_b32 s52, v31, 0
-; GCN-NEXT:    s_cmp_lg_u32 s52, s33
-; GCN-NEXT:    v_writelane_b32 v31, s36, 17
-; GCN-NEXT:    v_writelane_b32 v31, s37, 18
-; GCN-NEXT:    v_writelane_b32 v31, s38, 19
-; GCN-NEXT:    v_writelane_b32 v31, s39, 20
-; GCN-NEXT:    v_writelane_b32 v31, s40, 21
-; GCN-NEXT:    v_writelane_b32 v31, s41, 22
-; GCN-NEXT:    v_writelane_b32 v31, s42, 23
-; GCN-NEXT:    v_writelane_b32 v31, s43, 24
-; GCN-NEXT:    v_writelane_b32 v31, s44, 25
-; GCN-NEXT:    v_writelane_b32 v31, s45, 26
-; GCN-NEXT:    v_writelane_b32 v31, s46, 27
-; GCN-NEXT:    v_writelane_b32 v31, s47, 28
-; GCN-NEXT:    v_writelane_b32 v31, s48, 29
-; GCN-NEXT:    v_writelane_b32 v31, s49, 30
-; GCN-NEXT:    v_writelane_b32 v31, s50, 31
-; GCN-NEXT:    v_writelane_b32 v31, s51, 32
-; GCN-NEXT:    v_writelane_b32 v31, s0, 33
-; GCN-NEXT:    v_writelane_b32 v31, s1, 34
-; GCN-NEXT:    v_writelane_b32 v31, s2, 35
-; GCN-NEXT:    v_writelane_b32 v31, s3, 36
-; GCN-NEXT:    v_writelane_b32 v31, s4, 37
-; GCN-NEXT:    v_writelane_b32 v31, s5, 38
-; GCN-NEXT:    v_writelane_b32 v31, s6, 39
-; GCN-NEXT:    v_writelane_b32 v31, s7, 40
-; GCN-NEXT:    v_writelane_b32 v31, s8, 41
-; GCN-NEXT:    v_writelane_b32 v31, s9, 42
-; GCN-NEXT:    v_writelane_b32 v31, s10, 43
-; GCN-NEXT:    v_writelane_b32 v31, s11, 44
-; GCN-NEXT:    v_writelane_b32 v31, s12, 45
-; GCN-NEXT:    v_writelane_b32 v31, s13, 46
-; GCN-NEXT:    v_writelane_b32 v31, s14, 47
-; GCN-NEXT:    v_writelane_b32 v31, s15, 48
-; GCN-NEXT:    buffer_store_dword v0, off, s[56:59], 0
-; GCN-NEXT:    v_writelane_b32 v0, s16, 0
-; GCN-NEXT:    v_writelane_b32 v0, s17, 1
-; GCN-NEXT:    v_writelane_b32 v0, s18, 2
-; GCN-NEXT:    v_writelane_b32 v0, s19, 3
-; GCN-NEXT:    v_writelane_b32 v0, s20, 4
-; GCN-NEXT:    v_writelane_b32 v0, s21, 5
-; GCN-NEXT:    v_writelane_b32 v0, s22, 6
-; GCN-NEXT:    v_writelane_b32 v0, s23, 7
-; GCN-NEXT:    v_writelane_b32 v0, s24, 8
-; GCN-NEXT:    v_writelane_b32 v0, s25, 9
-; GCN-NEXT:    v_writelane_b32 v0, s26, 10
-; GCN-NEXT:    v_writelane_b32 v0, s27, 11
-; GCN-NEXT:    v_writelane_b32 v0, s28, 12
-; GCN-NEXT:    v_writelane_b32 v0, s29, 13
-; GCN-NEXT:    v_writelane_b32 v0, s30, 14
-; GCN-NEXT:    v_writelane_b32 v0, s31, 15
-; GCN-NEXT:    s_mov_b64 s[16:17], exec
-; GCN-NEXT:    s_mov_b64 exec, 0xffff
-; GCN-NEXT:    buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[16:17]
-; GCN-NEXT:    v_writelane_b32 v31, s34, 49
-; GCN-NEXT:    v_writelane_b32 v31, s35, 50
-; GCN-NEXT:    buffer_load_dword v0, off, s[56:59], 0
-; GCN-NEXT:    s_cbranch_scc1 BB2_2
-; GCN-NEXT:  ; %bb.1: ; %bb0
-; GCN-NEXT:    v_readlane_b32 s0, v31, 1
-; GCN-NEXT:    v_readlane_b32 s1, v31, 2
-; GCN-NEXT:    v_readlane_b32 s2, v31, 3
-; GCN-NEXT:    v_readlane_b32 s3, v31, 4
-; GCN-NEXT:    v_readlane_b32 s4, v31, 5
-; GCN-NEXT:    v_readlane_b32 s5, v31, 6
-; GCN-NEXT:    v_readlane_b32 s6, v31, 7
-; GCN-NEXT:    v_readlane_b32 s7, v31, 8
-; GCN-NEXT:    v_readlane_b32 s8, v31, 9
-; GCN-NEXT:    v_readlane_b32 s9, v31, 10
-; GCN-NEXT:    v_readlane_b32 s10, v31, 11
-; GCN-NEXT:    v_readlane_b32 s11, v31, 12
-; GCN-NEXT:    v_readlane_b32 s12, v31, 13
-; GCN-NEXT:    v_readlane_b32 s13, v31, 14
-; GCN-NEXT:    v_readlane_b32 s14, v31, 15
-; GCN-NEXT:    v_readlane_b32 s15, v31, 16
+; GCN-NEXT:    v_writelane_b32 v31, s4, 16
+; GCN-NEXT:    v_writelane_b32 v31, s5, 17
+; GCN-NEXT:    v_writelane_b32 v31, s6, 18
+; GCN-NEXT:    v_writelane_b32 v31, s7, 19
+; GCN-NEXT:    v_writelane_b32 v31, s8, 20
+; GCN-NEXT:    v_writelane_b32 v31, s9, 21
+; GCN-NEXT:    v_writelane_b32 v31, s10, 22
+; GCN-NEXT:    v_writelane_b32 v31, s11, 23
+; GCN-NEXT:    v_writelane_b32 v31, s12, 24
+; GCN-NEXT:    v_writelane_b32 v31, s13, 25
+; GCN-NEXT:    v_writelane_b32 v31, s14, 26
+; GCN-NEXT:    v_writelane_b32 v31, s15, 27
+; GCN-NEXT:    v_writelane_b32 v31, s16, 28
+; GCN-NEXT:    v_writelane_b32 v31, s17, 29
+; GCN-NEXT:    v_writelane_b32 v31, s18, 30
+; GCN-NEXT:    v_writelane_b32 v31, s19, 31
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:15]
+; GCN-NEXT:    ; def s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v31, 17
-; GCN-NEXT:    v_readlane_b32 s1, v31, 18
-; GCN-NEXT:    v_readlane_b32 s2, v31, 19
-; GCN-NEXT:    v_readlane_b32 s3, v31, 20
-; GCN-NEXT:    v_readlane_b32 s4, v31, 21
-; GCN-NEXT:    v_readlane_b32 s5, v31, 22
-; GCN-NEXT:    v_readlane_b32 s6, v31, 23
-; GCN-NEXT:    v_readlane_b32 s7, v31, 24
-; GCN-NEXT:    v_readlane_b32 s8, v31, 25
-; GCN-NEXT:    v_readlane_b32 s9, v31, 26
-; GCN-NEXT:    v_readlane_b32 s10, v31, 27
-; GCN-NEXT:    v_readlane_b32 s11, v31, 28
-; GCN-NEXT:    v_readlane_b32 s12, v31, 29
-; GCN-NEXT:    v_readlane_b32 s13, v31, 30
-; GCN-NEXT:    v_readlane_b32 s14, v31, 31
-; GCN-NEXT:    v_readlane_b32 s15, v31, 32
+; GCN-NEXT:    v_writelane_b32 v31, s4, 32
+; GCN-NEXT:    v_writelane_b32 v31, s5, 33
+; GCN-NEXT:    v_writelane_b32 v31, s6, 34
+; GCN-NEXT:    v_writelane_b32 v31, s7, 35
+; GCN-NEXT:    v_writelane_b32 v31, s8, 36
+; GCN-NEXT:    v_writelane_b32 v31, s9, 37
+; GCN-NEXT:    v_writelane_b32 v31, s10, 38
+; GCN-NEXT:    v_writelane_b32 v31, s11, 39
+; GCN-NEXT:    v_writelane_b32 v31, s12, 40
+; GCN-NEXT:    v_writelane_b32 v31, s13, 41
+; GCN-NEXT:    v_writelane_b32 v31, s14, 42
+; GCN-NEXT:    v_writelane_b32 v31, s15, 43
+; GCN-NEXT:    v_writelane_b32 v31, s16, 44
+; GCN-NEXT:    v_writelane_b32 v31, s17, 45
+; GCN-NEXT:    v_writelane_b32 v31, s18, 46
+; GCN-NEXT:    v_writelane_b32 v31, s19, 47
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:15]
+; GCN-NEXT:    ; def s[4:19]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v31, s4, 48
+; GCN-NEXT:    v_writelane_b32 v31, s5, 49
+; GCN-NEXT:    v_writelane_b32 v31, s6, 50
+; GCN-NEXT:    v_writelane_b32 v31, s7, 51
+; GCN-NEXT:    v_writelane_b32 v31, s8, 52
+; GCN-NEXT:    v_writelane_b32 v31, s9, 53
+; GCN-NEXT:    v_writelane_b32 v31, s10, 54
+; GCN-NEXT:    v_writelane_b32 v31, s11, 55
+; GCN-NEXT:    v_writelane_b32 v31, s12, 56
+; GCN-NEXT:    v_writelane_b32 v31, s13, 57
+; GCN-NEXT:    v_writelane_b32 v31, s14, 58
+; GCN-NEXT:    v_writelane_b32 v31, s15, 59
+; GCN-NEXT:    v_writelane_b32 v31, s16, 60
+; GCN-NEXT:    v_writelane_b32 v31, s17, 61
+; GCN-NEXT:    v_writelane_b32 v31, s18, 62
+; GCN-NEXT:    v_writelane_b32 v31, s19, 63
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def s[2:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_writelane_b32 v0, s2, 0
+; GCN-NEXT:    v_writelane_b32 v0, s3, 1
+; GCN-NEXT:    s_mov_b64 s[2:3], exec
+; GCN-NEXT:    s_mov_b64 exec, 3
+; GCN-NEXT:    buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT:    s_mov_b64 exec, s[2:3]
+; GCN-NEXT:    s_mov_b32 s1, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_lg_u32 s0, s1
+; GCN-NEXT:    s_cbranch_scc1 BB2_2
+; GCN-NEXT:  ; %bb.1: ; %bb0
+; GCN-NEXT:    v_readlane_b32 s36, v31, 32
+; GCN-NEXT:    v_readlane_b32 s37, v31, 33
+; GCN-NEXT:    v_readlane_b32 s38, v31, 34
+; GCN-NEXT:    v_readlane_b32 s39, v31, 35
+; GCN-NEXT:    v_readlane_b32 s40, v31, 36
+; GCN-NEXT:    v_readlane_b32 s41, v31, 37
+; GCN-NEXT:    v_readlane_b32 s42, v31, 38
+; GCN-NEXT:    v_readlane_b32 s43, v31, 39
+; GCN-NEXT:    v_readlane_b32 s44, v31, 40
+; GCN-NEXT:    v_readlane_b32 s45, v31, 41
+; GCN-NEXT:    v_readlane_b32 s46, v31, 42
+; GCN-NEXT:    v_readlane_b32 s47, v31, 43
+; GCN-NEXT:    v_readlane_b32 s48, v31, 44
+; GCN-NEXT:    v_readlane_b32 s49, v31, 45
+; GCN-NEXT:    v_readlane_b32 s50, v31, 46
+; GCN-NEXT:    v_readlane_b32 s51, v31, 47
+; GCN-NEXT:    v_readlane_b32 s0, v31, 16
+; GCN-NEXT:    v_readlane_b32 s1, v31, 17
+; GCN-NEXT:    v_readlane_b32 s2, v31, 18
+; GCN-NEXT:    v_readlane_b32 s3, v31, 19
+; GCN-NEXT:    v_readlane_b32 s4, v31, 20
+; GCN-NEXT:    v_readlane_b32 s5, v31, 21
+; GCN-NEXT:    v_readlane_b32 s6, v31, 22
+; GCN-NEXT:    v_readlane_b32 s7, v31, 23
+; GCN-NEXT:    v_readlane_b32 s8, v31, 24
+; GCN-NEXT:    v_readlane_b32 s9, v31, 25
+; GCN-NEXT:    v_readlane_b32 s10, v31, 26
+; GCN-NEXT:    v_readlane_b32 s11, v31, 27
+; GCN-NEXT:    v_readlane_b32 s12, v31, 28
+; GCN-NEXT:    v_readlane_b32 s13, v31, 29
+; GCN-NEXT:    v_readlane_b32 s14, v31, 30
+; GCN-NEXT:    v_readlane_b32 s15, v31, 31
+; GCN-NEXT:    v_readlane_b32 s16, v31, 0
+; GCN-NEXT:    v_readlane_b32 s17, v31, 1
+; GCN-NEXT:    v_readlane_b32 s18, v31, 2
+; GCN-NEXT:    v_readlane_b32 s19, v31, 3
+; GCN-NEXT:    v_readlane_b32 s20, v31, 4
+; GCN-NEXT:    v_readlane_b32 s21, v31, 5
+; GCN-NEXT:    v_readlane_b32 s22, v31, 6
+; GCN-NEXT:    v_readlane_b32 s23, v31, 7
+; GCN-NEXT:    v_readlane_b32 s24, v31, 8
+; GCN-NEXT:    v_readlane_b32 s25, v31, 9
+; GCN-NEXT:    v_readlane_b32 s26, v31, 10
+; GCN-NEXT:    v_readlane_b32 s27, v31, 11
+; GCN-NEXT:    v_readlane_b32 s28, v31, 12
+; GCN-NEXT:    v_readlane_b32 s29, v31, 13
+; GCN-NEXT:    v_readlane_b32 s30, v31, 14
+; GCN-NEXT:    v_readlane_b32 s31, v31, 15
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use s[16:31]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v31, 33
-; GCN-NEXT:    v_readlane_b32 s1, v31, 34
-; GCN-NEXT:    v_readlane_b32 s2, v31, 35
-; GCN-NEXT:    v_readlane_b32 s3, v31, 36
-; GCN-NEXT:    v_readlane_b32 s4, v31, 37
-; GCN-NEXT:    v_readlane_b32 s5, v31, 38
-; GCN-NEXT:    v_readlane_b32 s6, v31, 39
-; GCN-NEXT:    v_readlane_b32 s7, v31, 40
-; GCN-NEXT:    v_readlane_b32 s8, v31, 41
-; GCN-NEXT:    v_readlane_b32 s9, v31, 42
-; GCN-NEXT:    v_readlane_b32 s10, v31, 43
-; GCN-NEXT:    v_readlane_b32 s11, v31, 44
-; GCN-NEXT:    v_readlane_b32 s12, v31, 45
-; GCN-NEXT:    v_readlane_b32 s13, v31, 46
-; GCN-NEXT:    v_readlane_b32 s14, v31, 47
-; GCN-NEXT:    v_readlane_b32 s15, v31, 48
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:15]
 ; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    v_readlane_b32 s4, v31, 48
+; GCN-NEXT:    v_readlane_b32 s5, v31, 49
+; GCN-NEXT:    v_readlane_b32 s6, v31, 50
+; GCN-NEXT:    v_readlane_b32 s7, v31, 51
+; GCN-NEXT:    v_readlane_b32 s8, v31, 52
+; GCN-NEXT:    v_readlane_b32 s9, v31, 53
+; GCN-NEXT:    v_readlane_b32 s10, v31, 54
+; GCN-NEXT:    v_readlane_b32 s11, v31, 55
+; GCN-NEXT:    v_readlane_b32 s12, v31, 56
+; GCN-NEXT:    v_readlane_b32 s13, v31, 57
+; GCN-NEXT:    v_readlane_b32 s14, v31, 58
+; GCN-NEXT:    v_readlane_b32 s15, v31, 59
+; GCN-NEXT:    v_readlane_b32 s16, v31, 60
+; GCN-NEXT:    v_readlane_b32 s17, v31, 61
+; GCN-NEXT:    v_readlane_b32 s18, v31, 62
+; GCN-NEXT:    v_readlane_b32 s19, v31, 63
 ; GCN-NEXT:    s_mov_b64 s[0:1], exec
-; GCN-NEXT:    s_mov_b64 exec, 0xffff
-; GCN-NEXT:    buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT:    s_mov_b64 exec, 3
+; GCN-NEXT:    buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
 ; GCN-NEXT:    s_mov_b64 exec, s[0:1]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readlane_b32 s0, v0, 0
 ; GCN-NEXT:    v_readlane_b32 s1, v0, 1
-; GCN-NEXT:    v_readlane_b32 s2, v0, 2
-; GCN-NEXT:    v_readlane_b32 s3, v0, 3
-; GCN-NEXT:    v_readlane_b32 s4, v0, 4
-; GCN-NEXT:    v_readlane_b32 s5, v0, 5
-; GCN-NEXT:    v_readlane_b32 s6, v0, 6
-; GCN-NEXT:    v_readlane_b32 s7, v0, 7
-; GCN-NEXT:    v_readlane_b32 s8, v0, 8
-; GCN-NEXT:    v_readlane_b32 s9, v0, 9
-; GCN-NEXT:    v_readlane_b32 s10, v0, 10
-; GCN-NEXT:    v_readlane_b32 s11, v0, 11
-; GCN-NEXT:    v_readlane_b32 s12, v0, 12
-; GCN-NEXT:    v_readlane_b32 s13, v0, 13
-; GCN-NEXT:    v_readlane_b32 s14, v0, 14
-; GCN-NEXT:    v_readlane_b32 s15, v0, 15
 ; GCN-NEXT:    ;;#ASMSTART
-; GCN-NEXT:    ; use s[0:15]
+; GCN-NEXT:    ; use s[36:51]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use s[4:19]
 ; GCN-NEXT:    ;;#ASMEND
-; GCN-NEXT:    v_readlane_b32 s0, v31, 49
-; GCN-NEXT:    v_readlane_b32 s1, v31, 50
 ; GCN-NEXT:    ;;#ASMSTART
 ; GCN-NEXT:    ; use s[0:1]
 ; GCN-NEXT:    ;;#ASMEND

diff  --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
index 73d837efa9f4..56a675e3ddad 100644
--- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
@@ -11,8 +11,8 @@ define void @child_function() #0 {
 ; GCN:  v_writelane_b32 v255, s30, 0
 ; GCN:  v_writelane_b32 v255, s31, 1
 ; GCN:  s_swappc_b64 s[30:31], s[4:5]
-; GCN:  v_readlane_b32 s4, v255, 0
-; GCN:  v_readlane_b32 s5, v255, 1
+; GCN:  v_readlane_b32 s30, v255, 0
+; GCN:  v_readlane_b32 s31, v255, 1
 ; GCN:  v_readlane_b32 s33, v255, 2
 ; GCN: ; NumVgprs: 256
 
@@ -57,8 +57,8 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
 ; GCN:  v_writelane_b32 v254, s30, 0
 ; GCN:  v_writelane_b32 v254, s31, 1
 ; GCN:  s_swappc_b64 s[30:31], s[4:5]
-; GCN:  v_readlane_b32 s4, v254, 0
-; GCN:  v_readlane_b32 s5, v254, 1
+; GCN:  v_readlane_b32 s30, v254, 0
+; GCN:  v_readlane_b32 s31, v254, 1
 ; GCN:  v_readlane_b32 s33, v254, 2
 
 define void @reserve_lowest_available_vgpr() #0 {

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
index 2138af8099f9..c817b977eb9d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
@@ -13,25 +13,25 @@ body: |
   ; SPILLED: bb.0:
   ; SPILLED:   successors: %bb.1(0x80000000)
   ; SPILLED:   S_NOP 0, implicit-def renamable $agpr0
-  ; SPILLED:   S_NOP 0, implicit-def renamable $agpr1
+  ; SPILLED:   SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
+  ; SPILLED:   S_NOP 0, implicit-def renamable $agpr0
   ; SPILLED:   SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
-  ; SPILLED:   SI_SPILL_A32_SAVE killed $agpr1, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5)
   ; SPILLED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SPILLED: bb.1:
   ; SPILLED:   successors: %bb.2(0x80000000)
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
-  ; SPILLED:   $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
-  ; SPILLED:   $agpr1 = SI_SPILL_A32_RESTORE %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0, implicit renamable $agpr1
+  ; SPILLED:   $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5)
+  ; SPILLED:   $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1
   ; EXPANDED-LABEL: name: spill_restore_agpr32
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
   ; EXPANDED:   liveins: $vgpr0, $vgpr1
   ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0
-  ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr1
   ; EXPANDED:   $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
-  ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+  ; EXPANDED:   S_NOP 0, implicit-def renamable $agpr0
+  ; EXPANDED:   $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
   ; EXPANDED:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED: bb.1:
   ; EXPANDED:   successors: %bb.2(0x80000000)
@@ -41,7 +41,7 @@ body: |
   ; EXPANDED:   liveins: $vgpr0, $vgpr1
   ; EXPANDED:   $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec
   ; EXPANDED:   $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0, implicit renamable $agpr1
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1
   bb.0:
     S_NOP 0, implicit-def %0:agpr_32
     S_NOP 0, implicit-def %1:agpr_32
@@ -72,7 +72,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0_agpr1
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0_agpr1
   ; EXPANDED-LABEL: name: spill_restore_agpr64
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -89,7 +89,7 @@ body: |
   ; EXPANDED:   liveins: $vgpr0, $vgpr1
   ; EXPANDED:   $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
   ; EXPANDED:   $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0_agpr1
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0_agpr1
   bb.0:
     S_NOP 0, implicit-def %0:areg_64
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -118,6 +118,7 @@ body: |
   ; SPILLED: bb.1:
   ; SPILLED:   successors: %bb.2(0x80000000)
   ; SPILLED: bb.2:
+  ; SPILLED:   $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
   ; SPILLED:   S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
   ; SPILLED:   S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
   ; SPILLED:   S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
@@ -134,8 +135,7 @@ body: |
   ; SPILLED:   S_NOP 0, implicit undef $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; SPILLED:   S_NOP 0, implicit undef $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247
   ; SPILLED:   S_NOP 0, implicit undef $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255
-  ; SPILLED:   $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0
   ; EXPANDED-LABEL: name: spill_restore_agpr32_used_all_vgprs
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -149,6 +149,8 @@ body: |
   ; EXPANDED: bb.1:
   ; EXPANDED:   successors: %bb.2(0x80000000)
   ; EXPANDED: bb.2:
+  ; EXPANDED:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+  ; EXPANDED:   $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
   ; EXPANDED:   S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
   ; EXPANDED:   S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
   ; EXPANDED:   S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
@@ -165,9 +167,7 @@ body: |
   ; EXPANDED:   S_NOP 0, implicit undef $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; EXPANDED:   S_NOP 0, implicit undef $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247
   ; EXPANDED:   S_NOP 0, implicit undef $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255
-  ; EXPANDED:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
-  ; EXPANDED:   $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0
   bb.0:
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255
 
@@ -214,7 +214,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2
   ; EXPANDED-LABEL: name: spill_restore_agpr96
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -233,7 +233,7 @@ body: |
   ; EXPANDED:   $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
   ; EXPANDED:   $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2
   ; EXPANDED:   $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2
   bb.0:
     S_NOP 0, implicit-def %0:areg_96
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -263,7 +263,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3
   ; EXPANDED-LABEL: name: spill_restore_agpr128
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -284,7 +284,7 @@ body: |
   ; EXPANDED:   $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
   ; EXPANDED:   $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
   ; EXPANDED:   $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3
   bb.0:
     S_NOP 0, implicit-def %0:areg_128
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -314,7 +314,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4
   ; EXPANDED-LABEL: name: spill_restore_agpr160
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -337,7 +337,7 @@ body: |
   ; EXPANDED:   $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
   ; EXPANDED:   $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
   ; EXPANDED:   $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4
   bb.0:
     S_NOP 0, implicit-def %0:areg_160
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -367,7 +367,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
   ; EXPANDED-LABEL: name: spill_restore_agpr192
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -392,7 +392,7 @@ body: |
   ; EXPANDED:   $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
   ; EXPANDED:   $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
   ; EXPANDED:   $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
   bb.0:
     S_NOP 0, implicit-def %0:areg_192
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -422,7 +422,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
   ; EXPANDED-LABEL: name: spill_restore_agpr256
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -451,7 +451,7 @@ body: |
   ; EXPANDED:   $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
   ; EXPANDED:   $agpr6 = V_ACCVGPR_WRITE_B32 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
   ; EXPANDED:   $agpr7 = V_ACCVGPR_WRITE_B32 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
   bb.0:
     S_NOP 0, implicit-def %0:areg_256
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -481,7 +481,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
   ; EXPANDED-LABEL: name: spill_restore_agpr512
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -526,7 +526,7 @@ body: |
   ; EXPANDED:   $agpr13 = V_ACCVGPR_WRITE_B32 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
   ; EXPANDED:   $agpr14 = V_ACCVGPR_WRITE_B32 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
   ; EXPANDED:   $agpr15 = V_ACCVGPR_WRITE_B32 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
   bb.0:
     S_NOP 0, implicit-def %0:areg_512
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -556,7 +556,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+  ; SPILLED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
   ; EXPANDED-LABEL: name: spill_restore_agpr1024
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -633,7 +633,7 @@ body: |
   ; EXPANDED:   $agpr29 = V_ACCVGPR_WRITE_B32 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
   ; EXPANDED:   $agpr30 = V_ACCVGPR_WRITE_B32 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
   ; EXPANDED:   $agpr31 = V_ACCVGPR_WRITE_B32 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-  ; EXPANDED:   S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
   bb.0:
     S_NOP 0, implicit-def %0:areg_1024
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index 9b629a5f9111..474461d2ae12 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -1,28 +1,32 @@
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga  -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVGPR -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga  -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVGPR -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVMEM -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVMEM -check-prefix=GCN %s
 
 ; XXX - Why does it like to use vcc?
 
 ; GCN-LABEL: {{^}}spill_m0:
 
-; GCN-DAG: s_cmp_lg_u32
+; GCN: #ASMSTART
+; GCN-NEXT: s_mov_b32 m0, 0
+; GCN-NEXT: #ASMEND
+; GCN-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
 
-; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
-; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 2
+; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], [[M0_LANE:[0-9]+]]
 
-; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
-; TOVMEM-DAG: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
-; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; 4-byte Folded Spill
+; TOVMEM: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
+; TOVMEM: s_mov_b32 [[COPY_EXEC_LO:s[0-9]+]], exec_lo
+; TOVMEM: s_mov_b32 exec_lo, 1
+; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Spill
+; TOVMEM: s_mov_b32 exec_lo, [[COPY_EXEC_LO]]
 
 ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
 
 ; GCN: [[ENDIF]]:
-; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 2
+; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], [[M0_LANE]]
 ; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]]
 
-; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; 4-byte Folded Reload
+; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Reload
 ; TOVMEM: s_waitcnt vmcnt(0)
 ; TOVMEM: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]], 0
 ; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
@@ -48,8 +52,6 @@ endif:
 
 ; m0 is killed, so it isn't necessary during the entry block spill to preserve it
 ; GCN-LABEL: {{^}}spill_kill_m0_lds:
-; GCN: s_mov_b32 m0, s6
-; GCN: v_interp_mov_f32
 
 ; GCN-NOT: v_readlane_b32 m0
 ; GCN-NOT: s_buffer_store_dword m0
@@ -79,10 +81,11 @@ endif:                                            ; preds = %else, %if
 
 ; Force save and restore of m0 during SMEM spill
 ; GCN-LABEL: {{^}}m0_unavailable_spill:
+; GCN: s_load_dword [[REG0:s[0-9]+]], s[0:1], {{0x[0-9]+}}
 
 ; GCN: ; def m0, 1
 
-; GCN: s_mov_b32 m0, s0
+; GCN: s_mov_b32 m0, [[REG0]]
 ; GCN: v_interp_mov_f32
 
 ; GCN: ; clobber m0
@@ -124,16 +127,17 @@ endif:
 }
 
 ; GCN-LABEL: {{^}}restore_m0_lds:
-; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
-; TOSMEM: s_cmp_eq_u32
 ; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it
 ; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x100
-; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill
+; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}}
+; TOSMEM: s_buffer_store_dword s1, s[88:91], m0 ; 4-byte Folded Spill
 ; FIXME-TOSMEM-NOT: m0
-; TOSMEM: s_add_u32 m0, s3, 0x200
+; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]]
+; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}}
+; TOSMEM: s_waitcnt lgkmcnt(0)
 ; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill
 ; FIXME-TOSMEM-NOT: m0
+; TOSMEM: s_cmp_eq_u32
 ; TOSMEM: s_cbranch_scc1
 
 ; TOSMEM: s_mov_b32 m0, -1
@@ -150,6 +154,13 @@ endif:
 ; TOSMEM: s_add_u32 m0, s3, 0x100
 ; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload
 ; FIXME-TOSMEM-NOT: m0
+
+; TOSMEM: s_mov_b32 [[REG1:s[0-9]+]], m0
+; TOSMEM: s_add_u32 m0, s3, 0x100
+; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload
+; TOSMEM: s_mov_b32 m0, [[REG1]]
+; TOSMEM: s_mov_b32 m0, -1
+
 ; TOSMEM: s_waitcnt lgkmcnt(0)
 ; TOSMEM-NOT: m0
 ; TOSMEM: s_mov_b32 m0, s2

diff  --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
index 25a2d6ebb006..9344b0cb8c8a 100644
--- a/llvm/test/CodeGen/AMDGPU/spill192.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -24,7 +24,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 24 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; SPILLED:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
   ; EXPANDED-LABEL: name: spill_restore_sgpr192
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -49,7 +49,7 @@ body: |
   ; EXPANDED:   $sgpr7 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 3
   ; EXPANDED:   $sgpr8 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 4
   ; EXPANDED:   $sgpr9 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 5
-  ; EXPANDED:   S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
   bb.0:
     S_NOP 0, implicit-def %0:sgpr_192
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1
@@ -79,7 +79,7 @@ body: |
   ; SPILLED:   S_NOP 1
   ; SPILLED: bb.2:
   ; SPILLED:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
-  ; SPILLED:   S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+  ; SPILLED:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
   ; EXPANDED-LABEL: name: spill_restore_vgpr192
   ; EXPANDED: bb.0:
   ; EXPANDED:   successors: %bb.1(0x80000000)
@@ -91,7 +91,7 @@ body: |
   ; EXPANDED:   S_NOP 1
   ; EXPANDED: bb.2:
   ; EXPANDED:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
-  ; EXPANDED:   S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+  ; EXPANDED:   S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
   bb.0:
     S_NOP 0, implicit-def %0:vreg_192
     S_CBRANCH_SCC1 implicit undef $scc, %bb.1

diff  --git a/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
new file mode 100644
index 000000000000..9f5b4793ecfb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir
@@ -0,0 +1,32 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s
+
+---
+name:            bar
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: bar
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc
+    ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5)
+    ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc
+    ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5)
+    ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec
+    ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc
+    %0:vgpr_32 = COPY $vgpr0
+    V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec
+    %3:sreg_64_xexec = COPY $vcc
+    %1:sreg_64_xexec = COPY $vcc
+    %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec
+    $vgpr0 = COPY %2
+    S_ENDPGM 0, implicit $vgpr0, implicit %3
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index 1a48e76a241b..0193313ff213 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -69,8 +69,8 @@ if:
 merge:
   %merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ]
 ; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]]
-; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]]
 ; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET]]
+; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]]
 ; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]]
   %tmp138 = icmp eq i32 %tmp122, %merge_value
   %tmp139 = sext i1 %tmp138 to i32
@@ -82,7 +82,7 @@ merge:
 }
 
 ; GFX9-LABEL: {{^}}called:
-define i32 @called(i32 %a) noinline {
+define hidden i32 @called(i32 %a) noinline {
 ; GFX9: v_add_u32_e32 v1, v0, v0
   %add = add i32 %a, %a
 ; GFX9: v_mul_lo_u32 v0, v1, v0
@@ -94,10 +94,15 @@ define i32 @called(i32 %a) noinline {
 
 ; GFX9-LABEL: {{^}}call:
 define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) {
-; GFX9-O0: v_mov_b32_e32 v0, s0
-; GFX9-O3: v_mov_b32_e32 v2, s0
+; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]]
+; GFX9-O0-DAG: s_mov_b32 s0, 0{{$}}
+; GFX9-O0-DAG: v_mov_b32_e32 v0, [[ARG]]
+
+; GFX9-O3: v_mov_b32_e32 v2, [[ARG]]
+
+
 ; GFX9-NEXT: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s0
 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0
 ; GFX9-NEXT: s_not_b64 exec, exec
   %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0)
@@ -107,12 +112,11 @@ define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) {
   %tmp134 = call i32 @called(i32 %tmp107)
 ; GFX9-O0: buffer_load_dword v1
 ; GFX9-O3: v_mov_b32_e32 v1, v0
-; GFX9-O0: v_add_u32_e32 v0, v0, v1
+; GFX9-O0: v_add_u32_e32 v1, v0, v1
 ; GFX9-O3: v_add_u32_e32 v1, v1, v2
   %tmp136 = add i32 %tmp134, %tmp107
   %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136)
-; GFX9-O0: buffer_store_dword v2
-; GFX9-O3: buffer_store_dword v0
+; GFX9: buffer_store_dword v0
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 %tmp137, <4 x i32> %tmp14, i32 4, i32 0, i32 0)
   ret void
 }
@@ -127,19 +131,24 @@ define i64 @called_i64(i64 %a) noinline {
 
 ; GFX9-LABEL: {{^}}call_i64:
 define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) {
-; GFX9-O0: v_mov_b32_e32 v0, s0
-; GFX9-O0: v_mov_b32_e32 v1, s1
-; GFX9-O3: v_mov_b32_e32 v7, s1
-; GFX9-O3: v_mov_b32_e32 v6, s0
-; GFX9-NEXT: s_not_b64 exec, exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s3
+; GFX9: s_load_dwordx2 s{{\[}}[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]{{\]}}
+
+; GFX9-O0: s_mov_b64 s{{\[}}[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]{{\]}}, 0{{$}}
+; GFX9-O0: v_mov_b32_e32 v1, s[[ARG_LO]]
+; GFX9-O0: v_mov_b32_e32 v2, s[[ARG_HI]]
+
+; GFX9-O3-DAG: v_mov_b32_e32 v7, s[[ARG_HI]]
+; GFX9-O3-DAG: v_mov_b32_e32 v6, s[[ARG_LO]]
+
+; GFX9: s_not_b64 exec, exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s[[ZERO_LO]]
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s[[ZERO_HI]]
 ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0
 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0
 ; GFX9-NEXT: s_not_b64 exec, exec
   %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0)
-; GFX9-O0: buffer_store_dword v0
 ; GFX9-O0: buffer_store_dword v1
+; GFX9-O0: buffer_store_dword v2
 ; GFX9: s_swappc_b64
   %tmp134 = call i64 @called_i64(i64 %tmp107)
 ; GFX9-O0: buffer_load_dword v4

diff  --git a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index b15145d85f17..662a78c4dfa6 100644
--- a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -8,6 +8,9 @@
 define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) #0 !dbg !4 {
 entry:
   %"alloca point" = bitcast i32 0 to i32
+  br label %realentry
+
+realentry:
   call void @llvm.dbg.value(metadata i32 %i, metadata !21, metadata !DIExpression()), !dbg !22
   call void @llvm.dbg.value(metadata %struct.SVal* %location, metadata !23, metadata !DIExpression()), !dbg !22
   %tmp = icmp ne i32 %i, 0, !dbg !25

diff  --git a/llvm/test/CodeGen/ARM/Windows/alloca.ll b/llvm/test/CodeGen/ARM/Windows/alloca.ll
index 7db854df7296..ec3b130b3d8b 100644
--- a/llvm/test/CodeGen/ARM/Windows/alloca.ll
+++ b/llvm/test/CodeGen/ARM/Windows/alloca.ll
@@ -17,10 +17,11 @@ entry:
 ; CHECK: bl num_entries
 ; Any register is actually valid here, but turns out we use lr,
 ; because we do not have the kill flag on R0.
-; CHECK: movs [[R1:r1]], #7
-; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
-; CHECK: bic [[R0]], [[R0]], #4
-; CHECK: lsrs r4, [[R0]], #2
+; CHECK: mov [[R0:r[0-9]+]], r0
+; CHECK: movs [[R1:r[0-9]+]], #7
+; CHECK: add.w [[R2:r[0-9]+]], [[R1]], [[R0]], lsl #2
+; CHECK: bic [[R2]], [[R2]], #4
+; CHECK: lsrs r4, [[R2]], #2
 ; CHECK: bl __chkstk
 ; CHECK: sub.w sp, sp, r4
 

diff  --git a/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll
index 9e9a93e19b6a..29336c2f7ffd 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll
@@ -7,12 +7,10 @@
 ; CHECK_LABEL:	main:
 ; CHECK:	ldr [[R2:r[0-9]+]], {{\[}}[[R1:r[0-9]+]]{{\]}}
 ; CHECK-NEXT:	ldr [[R1]], {{\[}}[[R1]], #4]
-; CHECK:	mov [[R4:r[0-9]+]], [[R2]]
-; CHECK-NEXT:	mov [[R5:r[0-9]+]], [[R1]]
-; CHECK:	ldr [[R2]], {{\[}}[[R1]]{{\]}}
-; CHECK-NEXT:	ldr [[R1]], {{\[}}[[R1]], #4]
-; CHECK:	mov [[R6:r[0-9]+]], [[R2]]
-; CHECK-NEXT:	mov [[R7:r[0-9]+]], [[R1]]
+; CHECK:	mov [[R4:r[0-9]+]], [[R1]]
+; CHECK:	ldr [[R5:r[0-9]+]], {{\[}}[[R1]]{{\]}}
+; CHECK-NEXT:	ldr [[R6:r[0-9]+]], {{\[}}[[R1]], #4]
+; CHECK:	mov [[R7:r[0-9]+]], [[R6]]
 
 define arm_aapcs_vfpcc i32 @main() #0 {
 entry:

diff  --git a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
index d3696cfe39a8..1bc15dce2081 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-O0.ll
@@ -7,19 +7,21 @@
 
 define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_8:
+; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0
+; CHECK-DAG: mov [[NEW:r[0-9]+]], r2
 ; CHECK:     dmb ish
 ; CHECK:     uxtb [[DESIRED:r[0-9]+]], [[DESIRED]]
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldrexb [[OLD:[lr0-9]+]], [r0]
+; CHECK:     ldrexb [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD]], [[DESIRED]]
 ; CHECK:     bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     strexb [[STATUS:r[0-9]+]], r2, [r0]
+; CHECK:     strexb [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp{{(\.w)?}} [[STATUS]], #0
 ; CHECK:     bne [[RETRY]]
 ; CHECK: [[DONE]]:
 ; Materialisation of a boolean is done with sub/clz/lsr
 ; CHECK:     uxtb [[CMP1:r[0-9]+]], [[DESIRED]]
-; CHECK:     sub{{(\.w)?}} [[CMP1]], [[OLD]], [[CMP1]]
+; CHECK:     sub{{(\.w|s)?}} [[CMP1]], [[OLD]], [[CMP1]]
 ; CHECK:     clz [[CMP2:r[0-9]+]], [[CMP1]]
 ; CHECK:     lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5
 ; CHECK:     dmb ish
@@ -29,19 +31,21 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
 
 define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_16:
+; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0
+; CHECK-DAG: mov [[NEW:r[0-9]+]], r2
 ; CHECK:     dmb ish
 ; CHECK:     uxth [[DESIRED:r[0-9]+]], [[DESIRED]]
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldrexh [[OLD:[lr0-9]+]], [r0]
+; CHECK:     ldrexh [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD]], [[DESIRED]]
 ; CHECK:     bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     strexh [[STATUS:r[0-9]+]], r2, [r0]
+; CHECK:     strexh [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp{{(\.w)?}} [[STATUS]], #0
 ; CHECK:     bne [[RETRY]]
 ; CHECK: [[DONE]]:
 ; Materialisation of a boolean is done with sub/clz/lsr
 ; CHECK:     uxth [[CMP1:r[0-9]+]], [[DESIRED]]
-; CHECK:     sub{{(\.w)?}} [[CMP1]], [[OLD]], [[CMP1]]
+; CHECK:     sub{{(\.w|s)?}} [[CMP1]], [[OLD]], [[CMP1]]
 ; CHECK:     clz [[CMP2:r[0-9]+]], [[CMP1]]
 ; CHECK:     lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5
 ; CHECK:     dmb ish
@@ -51,13 +55,15 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
 
 define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_32:
+; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0
+; CHECK-DAG: mov [[NEW:r[0-9]+]], r2
 ; CHECK:     dmb ish
 ; CHECK-NOT:     uxt
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldrex [[OLD:r[0-9]+]], [r0]
+; CHECK:     ldrex [[OLD:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLD]], [[DESIRED]]
 ; CHECK:     bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     strex [[STATUS:r[0-9]+]], r2, [r0]
+; CHECK:     strex [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp{{(\.w)?}} [[STATUS]], #0
 ; CHECK:     bne [[RETRY]]
 ; CHECK: [[DONE]]:
@@ -72,14 +78,15 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind
 
 define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
 ; CHECK-LABEL: test_cmpxchg_64:
+; CHECK:     mov [[ADDR:r[0-9]+]], r0
 ; CHECK:     dmb ish
 ; CHECK-NOT: uxt
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0]
+; CHECK:     ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLDLO]], r6
 ; CHECK:     cmpeq [[OLDHI]], r7
 ; CHECK:     bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     strexd [[STATUS:[lr0-9]+]], r4, r5, [r0]
+; CHECK:     strexd [[STATUS:[lr0-9]+]], r8, r9, [r1]
 ; CHECK:     cmp{{(\.w)?}} [[STATUS]], #0
 ; CHECK:     bne [[RETRY]]
 ; CHECK: [[DONE]]:
@@ -90,14 +97,15 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind
 
 define { i64, i1 } @test_nontrivial_args(i64* %addr, i64 %desired, i64 %new) {
 ; CHECK-LABEL: test_nontrivial_args:
+; CHECK:     mov [[ADDR:r[0-9]+]], r0
 ; CHECK:     dmb ish
 ; CHECK-NOT: uxt
 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
-; CHECK:     ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0]
+; CHECK:     ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp [[OLDLO]], {{r[0-9]+}}
 ; CHECK:     cmpeq [[OLDHI]], {{r[0-9]+}}
 ; CHECK:     bne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK:     strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r0]
+; CHECK:     strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, {{\[}}[[ADDR]]{{\]}}
 ; CHECK:     cmp{{(\.w)?}} [[STATUS]], #0
 ; CHECK:     bne [[RETRY]]
 ; CHECK: [[DONE]]:

diff  --git a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
index d3c5057e3821..a0241d95a7c6 100644
--- a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
+++ b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
@@ -14,21 +14,21 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
 ; SOURCE-SCHED: ldr
 ; SOURCE-SCHED: ldr
-; SOURCE-SCHED: add
 ; SOURCE-SCHED: ldr
-; SOURCE-SCHED: add
 ; SOURCE-SCHED: ldr
-; SOURCE-SCHED: add
 ; SOURCE-SCHED: ldr
 ; SOURCE-SCHED: add
+; SOURCE-SCHED: add
+; SOURCE-SCHED: add
+; SOURCE-SCHED: add
+; SOURCE-SCHED: ldr
 ; SOURCE-SCHED: str
 ; SOURCE-SCHED: str
 ; SOURCE-SCHED: str
 ; SOURCE-SCHED: str
-; SOURCE-SCHED: ldr
 ; SOURCE-SCHED: bl
-; SOURCE-SCHED: add
 ; SOURCE-SCHED: ldr
+; SOURCE-SCHED: add
 ; SOURCE-SCHED: cmp
 ; SOURCE-SCHED: bne
   %i.031 = phi i32 [ 0, %for.body.lr.ph ], [ %0, %for.body ]

diff  --git a/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
index 8b31e7a51d51..1c9ffb1775aa 100644
--- a/llvm/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
@@ -6,8 +6,7 @@
 ; CHECK: DW_TAG_variable
 ; CHECK-NOT: DW_TAG
 ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset]
-; CHECK-NEXT:    [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18
-; CHECK-NEXT:    [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18
+; CHECK-NEXT:    [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18)
 ; CHECK-NEXT: DW_AT_name {{.*}} "mydata"
 
 ; Radar 9331779

diff  --git a/llvm/test/CodeGen/ARM/fast-isel-call.ll b/llvm/test/CodeGen/ARM/fast-isel-call.ll
index 9c313c727aee..293c268c5359 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-call.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-call.ll
@@ -41,38 +41,31 @@ define void @foo(i8 %a, i16 %b) nounwind {
 ; ARM: foo
 ; THUMB: foo
 ;; Materialize i1 1
-; ARM: movw r2, #1
+; ARM: movw [[REG0:r[0-9]+]], #1
+; THUMB: movs [[REG0:r[0-9]+]], #1
 ;; zero-ext
-; ARM: and r2, r2, #1
-; THUMB: and r2, r2, #1
+; ARM: and [[REG1:r[0-9]+]], [[REG0]], #1
+; THUMB: and [[REG1:r[0-9]+]], [[REG0]], #1
   %1 = call i32 @t0(i1 zeroext 1)
-; ARM: sxtb	r2, r1
-; ARM: mov r0, r2
-; THUMB: sxtb	r2, r1
-; THUMB: mov r0, r2
+; ARM: sxtb	r0, {{r[0-9]+}}
+; THUMB: sxtb	r0, {{r[0-9]+}}
   %2 = call i32 @t1(i8 signext %a)
-; ARM: and	r2, r1, #255
-; ARM: mov r0, r2
-; THUMB: and	r2, r1, #255
-; THUMB: mov r0, r2
+; ARM: and	r0, {{r[0-9]+}}, #255
+; THUMB: and	r0, {{r[0-9]+}}, #255
   %3 = call i32 @t2(i8 zeroext %a)
-; ARM: sxth	r2, r1
-; ARM: mov r0, r2
-; THUMB: sxth	r2, r1
-; THUMB: mov r0, r2
+; ARM: sxth	r0, {{r[0-9]+}}
+; THUMB: sxth	r0, {{r[0-9]+}}
   %4 = call i32 @t3(i16 signext %b)
-; ARM: uxth	r2, r1
-; ARM: mov r0, r2
-; THUMB: uxth	r2, r1
-; THUMB: mov r0, r2
+; ARM: uxth	r0, {{r[0-9]+}}
+; THUMB: uxth	r0, {{r[0-9]+}}
   %5 = call i32 @t4(i16 zeroext %b)
 
 ;; A few test to check materialization
 ;; Note: i1 1 was materialized with t1 call
-; ARM: movw r1, #255
+; ARM: movw {{r[0-9]+}}, #255
 %6 = call i32 @t2(i8 zeroext 255)
-; ARM: movw r1, #65535
-; THUMB: movw r1, #65535
+; ARM: movw {{r[0-9]+}}, #65535
+; THUMB: movw {{r[0-9]+}}, #65535
 %7 = call i32 @t4(i16 zeroext 65535)
   ret void
 }
@@ -112,10 +105,9 @@ entry:
 ; ARM: bl {{_?}}bar
 ; ARM-LONG-LABEL: @t10
 
-; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
-; ARM-LONG-MACHO: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
-; ARM-LONG-MACHO: str     [[R]], [r7, [[SLOT:#[-0-9]+]]]           @ 4-byte Spill
-; ARM-LONG-MACHO: ldr     [[R:l?r[0-9]*]], [r7, [[SLOT]]]           @ 4-byte Reload
+; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; ARM-LONG-MACHO: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}}
+; ARM-LONG-MACHO: ldr [[R:r[0-9]+]], {{\[}}[[R1]]]
 
 ; ARM-LONG-ELF: movw [[R:l?r[0-9]*]], :lower16:bar
 ; ARM-LONG-ELF: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
@@ -138,11 +130,9 @@ entry:
 ; THUMB-DAG: str.w [[R4]], [sp, #4]
 ; THUMB: bl {{_?}}bar
 ; THUMB-LONG-LABEL: @t10
-; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
-; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
-; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}}
-; THUMB-LONG: str     [[R]], [sp, [[SLOT:#[-0-9]+]]]           @ 4-byte Spill
-; THUMB-LONG: ldr.w   [[R:l?r[0-9]*]], [sp, [[SLOT]]]           @ 4-byte Reload
+; THUMB-LONG: {{(movw)|(ldr.n)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; THUMB-LONG: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}}
+; THUMB-LONG: ldr{{(.w)?}} [[R:r[0-9]+]], {{\[}}[[R1]]{{\]}}
 ; THUMB-LONG: blx [[R]]
   %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
   ret i32 0

diff  --git a/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll b/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll
index b308c4482d27..bda4c6d47237 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -55,16 +55,13 @@ define void @t2() nounwind ssp {
 
 ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr r0, [r0]
+; ARM-MACHO: ldr [[REG1:r[0-9]+]], [r0]
 
-; ARM-ELF: movw r0, :lower16:temp
-; ARM-ELF: movt r0, :upper16:temp
+; ARM-ELF: movw [[REG1:r[0-9]+]], :lower16:temp
+; ARM-ELF: movt [[REG1]], :upper16:temp
 
-; ARM: add r1, r0, #4
-; ARM: add r0, r0, #16
-; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
-; ARM: mov r0, r1
-; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
+; ARM: add r0, [[REG1]], #4
+; ARM: add r1, [[REG1]], #16
 ; ARM: movw r2, #17
 ; ARM: bl {{_?}}memcpy
 ; ARM-LONG-LABEL: t2:
@@ -80,12 +77,9 @@ define void @t2() nounwind ssp {
 ; THUMB-LABEL: t2:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr r0, [r0]
-; THUMB: adds r1, r0, #4
-; THUMB: adds r0, #16
-; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
-; THUMB: mov r0, r1
-; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
+; THUMB: ldr [[REG1:r[0-9]+]], [r0]
+; THUMB: adds r0, [[REG1]], #4
+; THUMB: adds r1, #16
 ; THUMB: movs r2, #17
 ; THUMB: bl {{_?}}memcpy
 ; THUMB-LONG-LABEL: t2:
@@ -104,15 +98,14 @@ define void @t3() nounwind ssp {
 
 ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr r0, [r0]
+; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
 
-; ARM-ELF: movw r0, :lower16:temp
-; ARM-ELF: movt r0, :upper16:temp
+; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
+; ARM-ELF: movt [[REG0]], :upper16:temp
 
 
-; ARM: add r1, r0, #4
-; ARM: add r0, r0, #16
-; ARM: mov r0, r1
+; ARM: add r0, [[REG0]], #4
+; ARM: add r1, [[REG0]], #16
 ; ARM: movw r2, #10
 ; ARM: bl {{_?}}memmove
 ; ARM-LONG-LABEL: t3:
@@ -128,12 +121,9 @@ define void @t3() nounwind ssp {
 ; THUMB-LABEL: t3:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr r0, [r0]
-; THUMB: adds r1, r0, #4
-; THUMB: adds r0, #16
-; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
-; THUMB: mov r0, r1
-; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
+; THUMB: ldr [[REG1:r[0-9]+]], [r0]
+; THUMB: adds r0, [[REG1]], #4
+; THUMB: adds r1, #16
 ; THUMB: movs r2, #10
 ; THUMB: bl {{_?}}memmove
 ; THUMB-LONG-LABEL: t3:
@@ -150,28 +140,28 @@ define void @t4() nounwind ssp {
 
 ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr r0, [r0]
+; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
 
-; ARM-ELF: movw r0, :lower16:temp
-; ARM-ELF: movt r0, :upper16:temp
+; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
+; ARM-ELF: movt [[REG0]], :upper16:temp
 
-; ARM: ldr r1, [r0, #16]
-; ARM: str r1, [r0, #4]
-; ARM: ldr r1, [r0, #20]
-; ARM: str r1, [r0, #8]
-; ARM: ldrh r1, [r0, #24]
-; ARM: strh r1, [r0, #12]
+; ARM: ldr [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
+; ARM: str [[REG1]], {{\[}}[[REG0]], #4]
+; ARM: ldr [[REG2:r[0-9]+]], {{\[}}[[REG0]], #20]
+; ARM: str [[REG2]], {{\[}}[[REG0]], #8]
+; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #24]
+; ARM: strh [[REG3]], {{\[}}[[REG0]], #12]
 ; ARM: bx lr
 ; THUMB-LABEL: t4:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr r0, [r0]
-; THUMB: ldr r1, [r0, #16]
-; THUMB: str r1, [r0, #4]
-; THUMB: ldr r1, [r0, #20]
-; THUMB: str r1, [r0, #8]
-; THUMB: ldrh r1, [r0, #24]
-; THUMB: strh r1, [r0, #12]
+; THUMB: ldr [[REG1:r[0-9]+]], [r0]
+; THUMB: ldr [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16]
+; THUMB: str [[REG2]], {{\[}}[[REG1]], #4]
+; THUMB: ldr [[REG3:r[0-9]+]], {{\[}}[[REG1]], #20]
+; THUMB: str [[REG3]], {{\[}}[[REG1]], #8]
+; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #24]
+; THUMB: strh [[REG4]], {{\[}}[[REG1]], #12]
 ; THUMB: bx lr
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
   ret void
@@ -184,36 +174,36 @@ define void @t5() nounwind ssp {
 
 ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr r0, [r0]
-
-; ARM-ELF: movw r0, :lower16:temp
-; ARM-ELF: movt r0, :upper16:temp
-
-; ARM: ldrh r1, [r0, #16]
-; ARM: strh r1, [r0, #4]
-; ARM: ldrh r1, [r0, #18]
-; ARM: strh r1, [r0, #6]
-; ARM: ldrh r1, [r0, #20]
-; ARM: strh r1, [r0, #8]
-; ARM: ldrh r1, [r0, #22]
-; ARM: strh r1, [r0, #10]
-; ARM: ldrh r1, [r0, #24]
-; ARM: strh r1, [r0, #12]
+; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
+
+; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
+; ARM-ELF: movt [[REG0]], :upper16:temp
+
+; ARM: ldrh [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
+; ARM: strh [[REG1]], {{\[}}[[REG0]], #4]
+; ARM: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG0]], #18]
+; ARM: strh [[REG2]], {{\[}}[[REG0]], #6]
+; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #20]
+; ARM: strh [[REG3]], {{\[}}[[REG0]], #8]
+; ARM: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG0]], #22]
+; ARM: strh [[REG4]], {{\[}}[[REG0]], #10]
+; ARM: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG0]], #24]
+; ARM: strh [[REG5]], {{\[}}[[REG0]], #12]
 ; ARM: bx lr
 ; THUMB-LABEL: t5:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr r0, [r0]
-; THUMB: ldrh r1, [r0, #16]
-; THUMB: strh r1, [r0, #4]
-; THUMB: ldrh r1, [r0, #18]
-; THUMB: strh r1, [r0, #6]
-; THUMB: ldrh r1, [r0, #20]
-; THUMB: strh r1, [r0, #8]
-; THUMB: ldrh r1, [r0, #22]
-; THUMB: strh r1, [r0, #10]
-; THUMB: ldrh r1, [r0, #24]
-; THUMB: strh r1, [r0, #12]
+; THUMB: ldr [[REG1:r[0-9]+]], [r0]
+; THUMB: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16]
+; THUMB: strh [[REG2]], {{\[}}[[REG1]], #4]
+; THUMB: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG1]], #18]
+; THUMB: strh [[REG3]], {{\[}}[[REG1]], #6]
+; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #20]
+; THUMB: strh [[REG4]], {{\[}}[[REG1]], #8]
+; THUMB: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG1]], #22]
+; THUMB: strh [[REG5]], {{\[}}[[REG1]], #10]
+; THUMB: ldrh [[REG6:r[0-9]+]], {{\[}}[[REG1]], #24]
+; THUMB: strh [[REG6]], {{\[}}[[REG1]], #12]
 ; THUMB: bx lr
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
   ret void
@@ -224,56 +214,56 @@ define void @t6() nounwind ssp {
 
 ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
 ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; ARM-MACHO: ldr r0, [r0]
-
-; ARM-ELF: movw r0, :lower16:temp
-; ARM-ELF: movt r0, :upper16:temp
-
-; ARM: ldrb r1, [r0, #16]
-; ARM: strb r1, [r0, #4]
-; ARM: ldrb r1, [r0, #17]
-; ARM: strb r1, [r0, #5]
-; ARM: ldrb r1, [r0, #18]
-; ARM: strb r1, [r0, #6]
-; ARM: ldrb r1, [r0, #19]
-; ARM: strb r1, [r0, #7]
-; ARM: ldrb r1, [r0, #20]
-; ARM: strb r1, [r0, #8]
-; ARM: ldrb r1, [r0, #21]
-; ARM: strb r1, [r0, #9]
-; ARM: ldrb r1, [r0, #22]
-; ARM: strb r1, [r0, #10]
-; ARM: ldrb r1, [r0, #23]
-; ARM: strb r1, [r0, #11]
-; ARM: ldrb r1, [r0, #24]
-; ARM: strb r1, [r0, #12]
-; ARM: ldrb r1, [r0, #25]
-; ARM: strb r1, [r0, #13]
+; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0]
+
+; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp
+; ARM-ELF: movt [[REG0]], :upper16:temp
+
+; ARM: ldrb [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16]
+; ARM: strb [[REG1]], {{\[}}[[REG0]], #4]
+; ARM: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #17]
+; ARM: strb [[REG2]], {{\[}}[[REG0]], #5]
+; ARM: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #18]
+; ARM: strb [[REG3]], {{\[}}[[REG0]], #6]
+; ARM: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #19]
+; ARM: strb [[REG4]], {{\[}}[[REG0]], #7]
+; ARM: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #20]
+; ARM: strb [[REG5]], {{\[}}[[REG0]], #8]
+; ARM: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #21]
+; ARM: strb [[REG6]], {{\[}}[[REG0]], #9]
+; ARM: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #22]
+; ARM: strb [[REG7]], {{\[}}[[REG0]], #10]
+; ARM: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #23]
+; ARM: strb [[REG8]], {{\[}}[[REG0]], #11]
+; ARM: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #24]
+; ARM: strb [[REG9]], {{\[}}[[REG0]], #12]
+; ARM: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #25]
+; ARM: strb [[REG10]], {{\[}}[[REG0]], #13]
 ; ARM: bx lr
 ; THUMB-LABEL: t6:
 ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
 ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
-; THUMB: ldr r0, [r0]
-; THUMB: ldrb r1, [r0, #16]
-; THUMB: strb r1, [r0, #4]
-; THUMB: ldrb r1, [r0, #17]
-; THUMB: strb r1, [r0, #5]
-; THUMB: ldrb r1, [r0, #18]
-; THUMB: strb r1, [r0, #6]
-; THUMB: ldrb r1, [r0, #19]
-; THUMB: strb r1, [r0, #7]
-; THUMB: ldrb r1, [r0, #20]
-; THUMB: strb r1, [r0, #8]
-; THUMB: ldrb r1, [r0, #21]
-; THUMB: strb r1, [r0, #9]
-; THUMB: ldrb r1, [r0, #22]
-; THUMB: strb r1, [r0, #10]
-; THUMB: ldrb r1, [r0, #23]
-; THUMB: strb r1, [r0, #11]
-; THUMB: ldrb r1, [r0, #24]
-; THUMB: strb r1, [r0, #12]
-; THUMB: ldrb r1, [r0, #25]
-; THUMB: strb r1, [r0, #13]
+; THUMB: ldr [[REG0:r[0-9]+]], [r0]
+; THUMB: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #16]
+; THUMB: strb [[REG2]], {{\[}}[[REG0]], #4]
+; THUMB: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #17]
+; THUMB: strb [[REG3]], {{\[}}[[REG0]], #5]
+; THUMB: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #18]
+; THUMB: strb [[REG4]], {{\[}}[[REG0]], #6]
+; THUMB: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #19]
+; THUMB: strb [[REG5]], {{\[}}[[REG0]], #7]
+; THUMB: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #20]
+; THUMB: strb [[REG6]], {{\[}}[[REG0]], #8]
+; THUMB: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #21]
+; THUMB: strb [[REG7]], {{\[}}[[REG0]], #9]
+; THUMB: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #22]
+; THUMB: strb [[REG8]], {{\[}}[[REG0]], #10]
+; THUMB: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #23]
+; THUMB: strb [[REG9]], {{\[}}[[REG0]], #11]
+; THUMB: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #24]
+; THUMB: strb [[REG10]], {{\[}}[[REG0]], #12]
+; THUMB: ldrb [[REG11:r[0-9]+]], {{\[}}[[REG0]], #25]
+; THUMB: strb [[REG11]], {{\[}}[[REG0]], #13]
 ; THUMB: bx lr
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
   ret void

diff  --git a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index f24100b36db9..95942c271c9c 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -2,7 +2,7 @@
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t1
+; THUMB-LABEL: t1:
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
   %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-4]
@@ -11,7 +11,7 @@ entry:
 
 define i32 @t2(i32* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t2
+; THUMB-LABEL: t2:
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
   %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-252]
@@ -20,7 +20,7 @@ entry:
 
 define i32 @t3(i32* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t3
+; THUMB-LABEL: t3:
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64
   %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0]
@@ -29,7 +29,7 @@ entry:
 
 define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t4
+; THUMB-LABEL: t4:
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
   %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
@@ -38,7 +38,7 @@ entry:
 
 define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t5
+; THUMB-LABEL: t5:
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
   %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
@@ -47,7 +47,7 @@ entry:
 
 define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t6
+; THUMB-LABEL: t6:
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128
   %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0]
@@ -56,7 +56,7 @@ entry:
 
 define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t7
+; THUMB-LABEL: t7:
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
   %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
@@ -65,7 +65,7 @@ entry:
 
 define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t8
+; THUMB-LABEL: t8:
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
   %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
@@ -74,7 +74,7 @@ entry:
 
 define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
 entry:
-; THUMB: t9
+; THUMB-LABEL: t9:
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256
   %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0]
@@ -83,81 +83,96 @@ entry:
 
 define void @t10(i32* nocapture %ptr) nounwind {
 entry:
-; THUMB: t10
+; THUMB-LABEL: t10:
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
   store i32 0, i32* %add.ptr, align 4
-; THUMB: str r{{[0-9]}}, [r0, #-4]
+; THUMB: mov [[REG:r[0-9]+]], r0
+; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-4]
   ret void
 }
 
 define void @t11(i32* nocapture %ptr) nounwind {
 entry:
-; THUMB: t11
+; THUMB-LABEL: t11:
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
   store i32 0, i32* %add.ptr, align 4
-; THUMB: str r{{[0-9]}}, [r0, #-252]
+; THUMB: mov [[REG:r[0-9]+]], r0
+; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-252]
   ret void
 }
 
 define void @t12(i32* nocapture %ptr) nounwind {
 entry:
-; THUMB: t12
+; THUMB-LABEL: t12:
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64
   store i32 0, i32* %add.ptr, align 4
-; THUMB: str r{{[0-9]}}, [r0]
+; THUMB: movw [[REG:r[0-9]+]], #65280
+; THUMB: movt [[REG]], #65535
+; THUMB: add [[REG]], r0
+; THUMB: str r{{[0-9]}}, {{\[}}[[REG]]]
   ret void
 }
 
 define void @t13(i16* nocapture %ptr) nounwind {
 entry:
-; THUMB: t13
+; THUMB-LABEL: t13:
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
   store i16 0, i16* %add.ptr, align 2
-; THUMB: strh r{{[0-9]}}, [r0, #-2]
+; THUMB: mov [[REG:r[0-9]+]], r0
+; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-2]
   ret void
 }
 
 define void @t14(i16* nocapture %ptr) nounwind {
 entry:
-; THUMB: t14
+; THUMB-LABEL: t14:
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
   store i16 0, i16* %add.ptr, align 2
-; THUMB: strh r{{[0-9]}}, [r0, #-254]
+; THUMB: mov [[REG:r[0-9]+]], r0
+; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-254]
   ret void
 }
 
 define void @t15(i16* nocapture %ptr) nounwind {
 entry:
-; THUMB: t15
+; THUMB-LABEL: t15:
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128
   store i16 0, i16* %add.ptr, align 2
-; THUMB: strh r{{[0-9]}}, [r0]
+; THUMB: movw [[REG:r[0-9]+]], #65280
+; THUMB: movt [[REG]], #65535
+; THUMB: add [[REG]], r0
+; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]]]
   ret void
 }
 
 define void @t16(i8* nocapture %ptr) nounwind {
 entry:
-; THUMB: t16
+; THUMB-LABEL: t16:
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
   store i8 0, i8* %add.ptr, align 1
-; THUMB: strb r{{[0-9]}}, [r0, #-1]
+; THUMB: mov [[REG:r[0-9]+]], r0
+; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-1]
   ret void
 }
 
 define void @t17(i8* nocapture %ptr) nounwind {
 entry:
-; THUMB: t17
+; THUMB-LABEL: t17:
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
   store i8 0, i8* %add.ptr, align 1
-; THUMB: strb r{{[0-9]}}, [r0, #-255]
+; THUMB: mov [[REG:r[0-9]+]], r0
+; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-255]
   ret void
 }
 
 define void @t18(i8* nocapture %ptr) nounwind {
 entry:
-; THUMB: t18
+; THUMB-LABEL: t18:
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256
   store i8 0, i8* %add.ptr, align 1
-; THUMB: strb r{{[0-9]}}, [r0]
+; THUMB: movw [[REG:r[0-9]+]], #65280
+; THUMB: movt [[REG]], #65535
+; THUMB: add [[REG]], r0
+; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]]]
   ret void
 }

diff  --git a/llvm/test/CodeGen/ARM/fast-isel-select.ll b/llvm/test/CodeGen/ARM/fast-isel-select.ll
index 0da63499f302..70987422dfde 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-select.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-select.ll
@@ -21,14 +21,12 @@ entry:
 define i32 @t2(i1 %c, i32 %a) nounwind readnone {
 entry:
 ; ARM: t2
-; ARM: tst r0, #1
-; ARM: moveq r{{[1-9]}}, #20
-; ARM: mov r0, r{{[1-9]}}
+; ARM: tst {{r[0-9]+}}, #1
+; ARM: moveq {{r[0-9]+}}, #20
 ; THUMB-LABEL: t2
-; THUMB: tst.w r0, #1
+; THUMB: tst.w {{r[0-9]+}}, #1
 ; THUMB: it eq
-; THUMB: moveq r{{[1-9]}}, #20
-; THUMB: mov r0, r{{[1-9]}}
+; THUMB: moveq {{r[0-9]+}}, #20
   %0 = select i1 %c, i32 %a, i32 20
   ret i32 %0
 }
@@ -43,7 +41,7 @@ entry:
 ; THUMB: tst.w r0, #1
 ; THUMB: it ne
 ; THUMB: movne r2, r1
-; THUMB: add.w r0, r2, r1
+; THUMB: adds r0, r2, r1
   %0 = select i1 %c, i32 %a, i32 %b
   %1 = add i32 %0, %a
   ret i32 %1
@@ -67,14 +65,12 @@ entry:
 define i32 @t5(i1 %c, i32 %a) nounwind readnone {
 entry:
 ; ARM: t5
-; ARM: tst r0, #1
-; ARM: mvneq r{{[1-9]}}, #1
-; ARM: mov r0, r{{[1-9]}}
+; ARM: tst {{r[0-9]+}}, #1
+; ARM: mvneq {{r[0-9]+}}, #1
 ; THUMB: t5
-; THUMB: tst.w r0, #1
+; THUMB: tst.w {{r[0-9]+}}, #1
 ; THUMB: it eq
-; THUMB: mvneq r{{[1-9]}}, #1
-; THUMB: mov r0, r{{[1-9]}}
+; THUMB: mvneq {{r[0-9]+}}, #1
   %0 = select i1 %c, i32 %a, i32 -2
   ret i32 %0
 }
@@ -83,14 +79,12 @@ entry:
 define i32 @t6(i1 %c, i32 %a) nounwind readnone {
 entry:
 ; ARM: t6
-; ARM: tst r0, #1
-; ARM: mvneq r{{[1-9]}}, #978944
-; ARM: mov r0, r{{[1-9]}}
+; ARM: tst {{r[0-9]+}}, #1
+; ARM: mvneq {{r[0-9]+}}, #978944
 ; THUMB: t6
-; THUMB: tst.w r0, #1
+; THUMB: tst.w {{r[0-9]+}}, #1
 ; THUMB: it eq
-; THUMB: mvneq r{{[1-9]}}, #978944
-; THUMB: mov r0, r{{[1-9]}}
+; THUMB: mvneq {{r[0-9]+}}, #978944
   %0 = select i1 %c, i32 %a, i32 -978945
   ret i32 %0
 }

diff  --git a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
index ffc3d9a05d88..3a9011ba622a 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -17,26 +17,24 @@ entry:
   %4 = load i32, i32* %n, align 4
 ; ARM: VarArg
 ; ARM: mov [[FP:r[0-9]+]], sp
-; ARM: sub sp, sp, #{{(36|40)}}
+; ARM: sub sp, sp, #32
 ; ARM: ldr r1, {{\[}}[[FP]], #-4]
 ; ARM: ldr r2, {{\[}}[[FP]], #-8]
 ; ARM: ldr r3, {{\[}}[[FP]], #-12]
-; ARM: ldr [[Ra:r[0-9]+]], {{\[}}[[FP]], #-16]
-; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #{{(16|20)}}]
-; ARM: movw [[Rc:[lr]+[0-9]*]], #5
-;   Ra got spilled
-; ARM: mov r0, [[Rc]]
-; ARM: str {{.*}}, [sp]
+; ARM: ldr [[Ra:r[0-9]+|lr]], [sp, #16]
+; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #12]
+; ARM: movw r0, #5
+; ARM: str [[Ra]], [sp]
 ; ARM: str [[Rb]], [sp, #4]
 ; ARM: bl {{_?CallVariadic}}
-; THUMB: sub sp, #{{36}}
-; THUMB: ldr r1, [sp, #32]
-; THUMB: ldr r2, [sp, #28]
-; THUMB: ldr r3, [sp, #24]
-; THUMB: ldr {{[a-z0-9]+}}, [sp, #20]
-; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #16]
-; THUMB: str.w {{[a-z0-9]+}}, [sp]
-; THUMB: str.w {{[a-z0-9]+}}, [sp, #4]
+; THUMB: sub sp, #32
+; THUMB: ldr r1, [sp, #28]
+; THUMB: ldr r2, [sp, #24]
+; THUMB: ldr r3, [sp, #20]
+; THUMB: ldr.w [[Ra:r[0-9]+]], [sp, #16]
+; THUMB: ldr.w [[Rb:r[0-9]+]], [sp, #12]
+; THUMB: str.w [[Ra]], [sp]
+; THUMB: str.w [[Rb]], [sp, #4]
 ; THUMB: bl {{_?}}CallVariadic
   %call = call i32 (i32, ...) @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
   store i32 %call, i32* %tmp, align 4

diff  --git a/llvm/test/CodeGen/ARM/ldrd.ll b/llvm/test/CodeGen/ARM/ldrd.ll
index b4325c78dbf2..2bba84141380 100644
--- a/llvm/test/CodeGen/ARM/ldrd.ll
+++ b/llvm/test/CodeGen/ARM/ldrd.ll
@@ -81,11 +81,12 @@ return:                                           ; preds = %bb, %entry
 ; CHECK-LABEL: Func1:
 define void @Func1() nounwind ssp "frame-pointer"="all" {
 entry:
-; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}}
-; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}}
+; A8: movw [[BASER:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}}
+; A8: movt [[BASER]], :upper16:{{.*}}TestVar{{.*}}
+; A8: ldr [[BASE:r[0-9]+]], {{\[}}[[BASER]]]
 ; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4]
-; A8-NEXT: add [[FIELD1]], [[FIELD2]]
-; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}}
+; A8-NEXT: add [[FIELD2]], [[FIELD1]]
+; A8-NEXT: str [[FIELD2]], {{\[}}[[BASE]]{{\]}}
 ; CONSERVATIVE-NOT: ldrd
   %orig_blocks = alloca [256 x i16], align 2
   %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start.p0i8(i64 512, i8* %0) nounwind

diff  --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
index 529775df5fd7..67ea37aa3503 100644
--- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
@@ -14,17 +14,17 @@ define i32 @vec_to_int() {
 ; CHECK-NEXT:    ldr r0, [r0]
 ; CHECK-NEXT:    @ implicit-def: $d17
 ; CHECK-NEXT:    vmov.32 d17[0], r0
-; CHECK-NEXT:    vrev32.16 d17, d17
+; CHECK-NEXT:    vrev32.16 d18, d17
 ; CHECK-NEXT:    vrev16.8 d16, d16
-; CHECK-NEXT:    vmov.f64 d18, d16
-; CHECK-NEXT:    vmov.f64 d19, d17
-; CHECK-NEXT:    vstmia sp, {d18, d19} @ 16-byte Spill
+; CHECK-NEXT:    @ kill: def $d16 killed $d16 def $q8
+; CHECK-NEXT:    vmov.f64 d17, d18
+; CHECK-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
 ; CHECK-NEXT:    b .LBB0_1
 ; CHECK-NEXT:  .LBB0_1: @ %bb.1
 ; CHECK-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
-; CHECK-NEXT:    vrev32.16 q9, q8
-; CHECK-NEXT:    @ kill: def $d19 killed $d19 killed $q9
-; CHECK-NEXT:    vmov.32 r0, d19[0]
+; CHECK-NEXT:    vrev32.16 q8, q8
+; CHECK-NEXT:    vmov.f64 d16, d17
+; CHECK-NEXT:    vmov.32 r0, d16[0]
 ; CHECK-NEXT:    add sp, sp, #28
 ; CHECK-NEXT:    pop {r4}
 ; CHECK-NEXT:    bx lr
@@ -41,15 +41,15 @@ bb.1:
 define i16 @int_to_vec(i80 %in) {
 ; CHECK-LABEL: int_to_vec:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    mov r3, r1
-; CHECK-NEXT:    mov r12, r0
+; CHECK-NEXT:    @ kill: def $r2 killed $r1
+; CHECK-NEXT:    @ kill: def $r2 killed $r0
 ; CHECK-NEXT:    lsl r0, r0, #16
 ; CHECK-NEXT:    orr r0, r0, r1, lsr #16
-; CHECK-NEXT:    @ implicit-def: $d16
-; CHECK-NEXT:    vmov.32 d16[0], r0
-; CHECK-NEXT:    @ implicit-def: $q9
-; CHECK-NEXT:    vmov.f64 d18, d16
-; CHECK-NEXT:    vrev32.16 q8, q9
+; CHECK-NEXT:    @ implicit-def: $d18
+; CHECK-NEXT:    vmov.32 d18[0], r0
+; CHECK-NEXT:    @ implicit-def: $q8
+; CHECK-NEXT:    vmov.f64 d16, d18
+; CHECK-NEXT:    vrev32.16 q8, q8
 ; CHECK-NEXT:    @ kill: def $d16 killed $d16 killed $q8
 ; CHECK-NEXT:    vmov.u16 r0, d16[0]
 ; CHECK-NEXT:    bx lr

diff  --git a/llvm/test/CodeGen/ARM/pr47454.ll b/llvm/test/CodeGen/ARM/pr47454.ll
index d36a29c4e77c..399de44ec731 100644
--- a/llvm/test/CodeGen/ARM/pr47454.ll
+++ b/llvm/test/CodeGen/ARM/pr47454.ll
@@ -16,23 +16,23 @@ define internal fastcc void @main() {
 ; CHECK-NEXT:    ldrh r0, [r11, #-2]
 ; CHECK-NEXT:    bl __gnu_h2f_ieee
 ; CHECK-NEXT:    vmov s0, r0
-; CHECK-NEXT:    vstr s0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    vstr s0, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    bl getConstant
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bl __gnu_h2f_ieee
 ; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bl __gnu_f2h_ieee
-; CHECK-NEXT:    vldr s0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    vmov r1, s0
-; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    uxth r1, r1
+; CHECK-NEXT:    vmov s0, r1
 ; CHECK-NEXT:    uxth r0, r0
-; CHECK-NEXT:    vmov s0, r0
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    uxth r1, r0
-; CHECK-NEXT:    vmov s1, r1
+; CHECK-NEXT:    vmov s1, r0
 ; CHECK-NEXT:    bl isEqual
 ; CHECK-NEXT:    mov sp, r11
 ; CHECK-NEXT:    pop {r11, pc}

diff  --git a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
index 02ee9c067f22..f2d9a5c0f7fd 100644
--- a/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
+++ b/llvm/test/CodeGen/ARM/stack-guard-reassign.ll
@@ -3,11 +3,12 @@
 ; Verify that the offset assigned to the stack protector is at the top of the
 ; frame, covering the locals.
 ; CHECK-LABEL: fn:
-; CHECK:      sub sp, sp, #32
+; CHECK:      sub sp, sp, #24
 ; CHECK-NEXT: sub sp, sp, #65536
 ; CHECK-NEXT: ldr r1, .LCPI0_0
-; CHECK-NEXT: ldr r2, [r1]
+; CHECK-NEXT: str r1, [sp, #8]
+; CHECK-NEXT: ldr r1, [r1]
 ; CHECK-NEXT: add lr, sp, #65536
-; CHECK-NEXT: str r2, [lr, #28]
+; CHECK-NEXT: str r1, [lr, #20]
 ; CHECK: .LCPI0_0:
 ; CHECK-NEXT: .long __stack_chk_guard

diff  --git a/llvm/test/CodeGen/ARM/swifterror.ll b/llvm/test/CodeGen/ARM/swifterror.ll
index d96bc0249b42..7968230ccab2 100644
--- a/llvm/test/CodeGen/ARM/swifterror.ll
+++ b/llvm/test/CodeGen/ARM/swifterror.ll
@@ -21,9 +21,9 @@ define float @foo(%swift_error** swifterror %error_ptr_ref) {
 ; CHECK-O0: mov r{{.*}}, #16
 ; CHECK-O0: malloc
 ; CHECK-O0: mov [[ID2:r[0-9]+]], r0
-; CHECK-O0: mov [[ID:r[0-9]+]], #1
-; CHECK-O0: strb [[ID]], [r0, #8]
 ; CHECK-O0: mov r8, [[ID2]]
+; CHECK-O0: mov [[ID:r[0-9]+]], #1
+; CHECK-O0: strb [[ID]], {{\[}}[[ID2]], #8]
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -49,16 +49,16 @@ define float @caller(i8* %error_ref) {
 ; CHECK-O0-LABEL: caller:
 ; spill r0
 ; CHECK-O0-DAG: mov r8, #0
-; CHECK-O0-DAG: str r0, [sp, [[SLOT:#[0-9]+]]
+; CHECK-O0-DAG: str r0, [sp[[SLOT:(, #[0-9]+)?]]]
 ; CHECK-O0: bl {{.*}}foo
 ; CHECK-O0: mov [[TMP:r[0-9]+]], r8
-; CHECK-O0: str [[TMP]], [sp]
+; CHECK-O0: str [[TMP]], [sp[[SLOT2:(, #[0-9]+)?]]]
 ; CHECK-O0: bne
+; CHECK-O0: ldr [[ID:r[0-9]+]], [sp[[SLOT]]]
 ; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
-; CHECK-O0: ldr     [[ID:r[0-9]+]], [sp, [[SLOT]]]
 ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
 ; reload r0
-; CHECK-O0: ldr r0, [sp]
+; CHECK-O0: ldr r0, [sp[[SLOT2]]]
 ; CHECK-O0: free
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
@@ -98,14 +98,14 @@ define float @caller2(i8* %error_ref) {
 ; CHECK-O0-DAG: mov r8, #0
 ; CHECK-O0: bl {{.*}}foo
 ; CHECK-O0: mov r{{.*}}, r8
-; CHECK-O0: str r0, [sp]
+; CHECK-O0: str r0, [sp{{(, #[0-9]+)?}}]
 ; CHECK-O0: bne
 ; CHECK-O0: ble
-; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
 ; reload r0
 ; CHECK-O0: ldr [[ID:r[0-9]+]],
+; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8]
 ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
-; CHECK-O0: ldr r0, [sp]
+; CHECK-O0: ldr r0, [sp{{(, #[0-9]+)?}}]
 ; CHECK-O0: free
 entry:
   %error_ptr_ref = alloca swifterror %swift_error*
@@ -143,16 +143,15 @@ define float @foo_if(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8]
 
 ; CHECK-O0-LABEL: foo_if:
-; CHECK-O0: cmp r0, #0
 ; spill to stack
 ; CHECK-O0: str r8
+; CHECK-O0: cmp r0, #0
 ; CHECK-O0: beq
 ; CHECK-O0: mov r0, #16
 ; CHECK-O0: malloc
 ; CHECK-O0: mov [[ID:r[0-9]+]], r0
 ; CHECK-O0: mov [[ID2:[a-z0-9]+]], #1
-; CHECK-O0: strb [[ID2]], [r0, #8]
-; CHECK-O0: mov r8, [[ID]]
+; CHECK-O0: strb [[ID2]], {{\[}}[[ID]], #8]
 ; reload from stack
 ; CHECK-O0: ldr r8
 entry:
@@ -233,18 +232,18 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swi
 ; CHECK-APPLE-DAG: str r{{.*}}, [{{.*}}[[SRET]], #4]
 
 ; CHECK-O0-LABEL: foo_sret:
-; CHECK-O0: mov r{{.*}}, #16
+; CHECK-O0-DAG: mov r{{.*}}, #16
 ; spill to stack: sret and val1
 ; CHECK-O0-DAG: str r0
 ; CHECK-O0-DAG: str r1
 ; CHECK-O0: malloc
-; CHECK-O0: mov [[ID:r[0-9]+]], #1
-; CHECK-O0: strb [[ID]], [r0, #8]
 ; reload from stack: sret and val1
 ; CHECK-O0: ldr
 ; CHECK-O0: ldr
-; CHECK-O0: str r{{.*}}, [{{.*}}, #4]
-; CHECK-O0: mov r8
+; CHECK-O0-DAG: mov r8
+; CHECK-O0-DAG: mov [[ID:r[0-9]+]], #1
+; CHECK-O0-DAG: strb [[ID]], [{{r[0-9]+}}, #8]
+; CHECK-O0-DAG: str r{{.*}}, [{{.*}}, #4]
 entry:
   %call = call i8* @malloc(i64 16)
   %call.0 = bitcast i8* %call to %swift_error*
@@ -271,16 +270,15 @@ define float @caller3(i8* %error_ref) {
 
 ; CHECK-O0-LABEL: caller3:
 ; CHECK-O0-DAG: mov r8, #0
-; CHECK-O0-DAG: mov r0
 ; CHECK-O0-DAG: mov r1
 ; CHECK-O0: bl {{.*}}foo_sret
 ; CHECK-O0: mov [[ID2:r[0-9]+]], r8
-; CHECK-O0: cmp r8
 ; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]]
+; CHECK-O0: cmp r8
 ; CHECK-O0: bne
 ; Access part of the error object and save it to error_ref
-; CHECK-O0: ldrb [[CODE:r[0-9]+]]
 ; CHECK-O0: ldr [[ID:r[0-9]+]]
+; CHECK-O0: ldrb [[CODE:r[0-9]+]]
 ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]]
 ; CHECK-O0: ldr r0, [sp[[SLOT]]
 ; CHECK-O0: bl {{.*}}free

diff  --git a/llvm/test/CodeGen/ARM/thumb-big-stack.ll b/llvm/test/CodeGen/ARM/thumb-big-stack.ll
index 954c12634cff..e5cbb9747a7e 100644
--- a/llvm/test/CodeGen/ARM/thumb-big-stack.ll
+++ b/llvm/test/CodeGen/ARM/thumb-big-stack.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7s-apple-ios"
 ; CHECK: movw [[ADDR:(r[0-9]+|lr)]], #
 ; CHECK-NEXT: add [[ADDR]], sp
 ; CHECK-NEXT: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, {{\[}}[[ADDR]]:128]
-define <4 x float> @f(<4 x float> %x, float %val) {
+define <4 x float> @f(<4 x float> %x) {
 entry:
   %.compoundliteral7837 = alloca <4 x float>, align 16
   %.compoundliteral7865 = alloca <4 x float>, align 16
@@ -143,9 +143,9 @@ entry:
   %.compoundliteral13969 = alloca <4 x float>, align 16
   %.compoundliteral13983 = alloca <4 x float>, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40746999A0000000, float 0xC0719B3340000000, float 0xC070B66660000000, float 0xC07404CCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x40746999A0000000, float 0xC0719B3340000000, float 0xC070B66660000000, float 0xC07404CCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40701B3340000000, float 0x405B866660000000, float 0xC0763999A0000000, float 4.895000e+02>, <4 x float>* undef
+  store <4 x float> <float 0x40701B3340000000, float 0x405B866660000000, float 0xC0763999A0000000, float 4.895000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -153,17 +153,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add68 = fadd <4 x float> %tmp1, %tmp
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add68, <4 x float>* undef, align 16
+  store <4 x float> %add68, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp2 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add76 = fadd float %val, 0x4074C999A0000000
+  %add76 = fadd float undef, 0x4074C999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp3 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins77 = insertelement <4 x float> %tmp3, float %add76, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins77, <4 x float>* undef, align 16
+  store <4 x float> %vecins77, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp4 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -175,15 +175,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins80 = insertelement <4 x float> %tmp5, float %add79, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins80, <4 x float>* undef, align 16
+  store <4 x float> %vecins80, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40678CCCC0000000, float 0xC03E4CCCC0000000, float -4.170000e+02, float -1.220000e+02>, <4 x float>* undef
+  store <4 x float> <float 0x40678CCCC0000000, float 0xC03E4CCCC0000000, float -4.170000e+02, float -1.220000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp6 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add82 = fadd <4 x float> undef, %tmp6
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add82, <4 x float>* undef, align 16
+  store <4 x float> %add82, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp7 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -195,19 +195,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins85 = insertelement <4 x float> %tmp8, float %add84, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins85, <4 x float>* undef, align 16
+  store <4 x float> %vecins85, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp9 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext86 = extractelement <4 x float> %tmp9, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add93 = fadd float %val, 0xC076C66660000000
+  %add93 = fadd float undef, 0xC076C66660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp10 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins94 = insertelement <4 x float> %tmp10, float %add93, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x406C2999A0000000, float 8.050000e+01, float 0xC0794999A0000000, float 0xC073E4CCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x406C2999A0000000, float 8.050000e+01, float 0xC0794999A0000000, float 0xC073E4CCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp11 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -223,17 +223,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp14 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins102 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins102 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins102, <4 x float>* undef, align 16
+  store <4 x float> %vecins102, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp15 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add104 = fadd float %val, 0x406AB999A0000000
+  %add104 = fadd float undef, 0x406AB999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp16 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC0531999A0000000, float 0xC0737999A0000000, float 0x407CB33340000000, float 0xC06DCCCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC0531999A0000000, float 0xC0737999A0000000, float 0x407CB33340000000, float 0xC06DCCCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext579 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -243,7 +243,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins581 = insertelement <4 x float> %tmp17, float %add580, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins581, <4 x float>* undef, align 16
+  store <4 x float> %vecins581, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp18 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -251,7 +251,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add583 = fadd float %vecext582, 0x40444CCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp19 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -261,25 +261,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins592 = insertelement <4 x float> undef, float %add591, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins592, <4 x float>* undef, align 16
+  store <4 x float> %vecins592, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp20 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add594 = fadd float %val, 0xC05B466660000000
+  %add594 = fadd float undef, 0xC05B466660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add605 = fadd float %val, 0x407164CCC0000000
+  %add605 = fadd float undef, 0x407164CCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp21 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add616 = fadd float %val, 1.885000e+02
+  %add616 = fadd float undef, 1.885000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp22 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp23 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins620 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins620 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins620, <4 x float>* undef, align 16
+  store <4 x float> %vecins620, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext621 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -287,7 +287,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins623 = insertelement <4 x float> undef, float %add622, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins623, <4 x float>* undef, align 16
+  store <4 x float> %vecins623, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp24 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -299,9 +299,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins626 = insertelement <4 x float> %tmp25, float %add625, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins626, <4 x float>* undef, align 16
+  store <4 x float> %vecins626, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x404D0CCCC0000000, float 3.955000e+02, float 0xC0334CCCC0000000, float 0x40754E6660000000>, <4 x float>* undef
+  store <4 x float> <float 0x404D0CCCC0000000, float 3.955000e+02, float 0xC0334CCCC0000000, float 0x40754E6660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp26 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -309,7 +309,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add628 = fadd <4 x float> %tmp27, %tmp26
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add628, <4 x float>* undef, align 16
+  store <4 x float> %add628, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp28 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -321,7 +321,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins631 = insertelement <4 x float> %tmp29, float %add630, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins631, <4 x float>* undef, align 16
+  store <4 x float> %vecins631, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp30 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -333,7 +333,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins634 = insertelement <4 x float> %tmp31, float %add633, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins634, <4 x float>* undef, align 16
+  store <4 x float> %vecins634, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp32 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -347,13 +347,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp35 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add658 = fadd float %val, 0xC04A4CCCC0000000
+  %add658 = fadd float undef, 0xC04A4CCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext663 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp36 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins665 = insertelement <4 x float> %tmp36, float %val, i32 2
+  %vecins665 = insertelement <4 x float> %tmp36, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext694 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -363,31 +363,31 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins696 = insertelement <4 x float> %tmp37, float %add695, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins696, <4 x float>* undef, align 16
+  store <4 x float> %vecins696, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC069FCCCC0000000, float 0xC07C6E6660000000, float 0x4067E33340000000, float 0x4078DB3340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC069FCCCC0000000, float 0xC07C6E6660000000, float 0x4067E33340000000, float 0x4078DB3340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp38 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext699 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add703 = fadd float %val, 0x4068F33340000000
+  %add703 = fadd float undef, 0x4068F33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins704 = insertelement <4 x float> undef, float %add703, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins704, <4 x float>* undef, align 16
+  store <4 x float> %vecins704, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp39 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp40 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins710 = insertelement <4 x float> %tmp40, float %val, i32 3
+  %vecins710 = insertelement <4 x float> %tmp40, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins710, <4 x float>* undef, align 16
+  store <4 x float> %vecins710, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC05D9999A0000000, float 0x405D6CCCC0000000, float 0x40765CCCC0000000, float 0xC07C64CCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC05D9999A0000000, float 0x405D6CCCC0000000, float 0x40765CCCC0000000, float 0xC07C64CCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp41 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -395,7 +395,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add712 = fadd <4 x float> %tmp42, %tmp41
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add712, <4 x float>* undef, align 16
+  store <4 x float> %add712, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp43 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -403,7 +403,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp44 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins715 = insertelement <4 x float> %tmp44, float %val, i32 0
+  %vecins715 = insertelement <4 x float> %tmp44, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp45 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -415,19 +415,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins718 = insertelement <4 x float> %tmp46, float %add717, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins718, <4 x float>* undef, align 16
+  store <4 x float> %vecins718, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp47 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext719 = extractelement <4 x float> %tmp47, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add723 = fadd float %val, 0xC06A6CCCC0000000
+  %add723 = fadd float undef, 0xC06A6CCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins724 = insertelement <4 x float> undef, float %add723, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add726 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext730 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -437,19 +437,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins732 = insertelement <4 x float> %tmp48, float %add731, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins732, <4 x float>* undef, align 16
+  store <4 x float> %vecins732, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp49 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext733 = extractelement <4 x float> %tmp49, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp50 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins738 = insertelement <4 x float> %tmp50, float %val, i32 3
+  %vecins738 = insertelement <4 x float> %tmp50, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x406E6CCCC0000000, float 0xC07A766660000000, float 0xC0608CCCC0000000, float 0xC063333340000000>, <4 x float>* undef
+  store <4 x float> <float 0x406E6CCCC0000000, float 0xC07A766660000000, float 0xC0608CCCC0000000, float 0xC063333340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp51 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -465,7 +465,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins743 = insertelement <4 x float> %tmp53, float %add742, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins743, <4 x float>* undef, align 16
+  store <4 x float> %vecins743, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp54 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -473,7 +473,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add754 = fadd <4 x float> %tmp55, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add754, <4 x float>* undef, align 16
+  store <4 x float> %add754, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp56 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -485,7 +485,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins757 = insertelement <4 x float> %tmp57, float %add756, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add765 = fadd float %val, 0x405BA66660000000
+  %add765 = fadd float undef, 0x405BA66660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp58 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -501,11 +501,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins771 = insertelement <4 x float> %tmp60, float %add770, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins771, <4 x float>* undef, align 16
+  store <4 x float> %vecins771, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp61 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add776 = fadd float %val, 0xC055F33340000000
+  %add776 = fadd float undef, 0xC055F33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins777 = insertelement <4 x float> undef, float %add776, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -515,7 +515,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add782 = fadd <4 x float> %tmp63, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add782, <4 x float>* undef, align 16
+  store <4 x float> %add782, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp64 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -523,25 +523,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add784 = fadd float %vecext783, -3.455000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07A866660000000, float 0xC05CF999A0000000, float 0xC0757199A0000000, float -3.845000e+02>, <4 x float>* undef
+  store <4 x float> <float 0xC07A866660000000, float 0xC05CF999A0000000, float 0xC0757199A0000000, float -3.845000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add796 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add796, <4 x float>* undef, align 16
+  store <4 x float> %add796, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp65 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add801 = fadd float %val, 3.045000e+02
+  %add801 = fadd float undef, 3.045000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp66 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins802 = insertelement <4 x float> %tmp66, float %add801, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins802, <4 x float>* undef, align 16
+  store <4 x float> %vecins802, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext803 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp67 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -549,7 +549,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add810 = fadd <4 x float> undef, %tmp68
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add810, <4 x float>* undef, align 16
+  store <4 x float> %add810, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp69 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -557,17 +557,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp70 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins813 = insertelement <4 x float> %tmp70, float %val, i32 0
+  %vecins813 = insertelement <4 x float> %tmp70, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext817 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add818 = fadd float %vecext817, -4.830000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins822 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins822 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins822, <4 x float>* undef, align 16
+  store <4 x float> %vecins822, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 2.700000e+01, float 0xC05F666660000000, float 0xC07D0199A0000000, float 0x407A6CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 2.700000e+01, float 0xC05F666660000000, float 0xC07D0199A0000000, float 0x407A6CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp71 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -577,17 +577,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add838 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add838, <4 x float>* undef, align 16
+  store <4 x float> %add838, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp73 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext839 = extractelement <4 x float> %tmp73, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add849 = fadd float %val, 0xC07C266660000000
+  %add849 = fadd float undef, 0xC07C266660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07D566660000000, float 0xC06D233340000000, float 0x4068B33340000000, float 0xC07ADCCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC07D566660000000, float 0xC06D233340000000, float 0x4068B33340000000, float 0xC07ADCCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp74 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -609,9 +609,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins861 = insertelement <4 x float> %tmp77, float %add860, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins889 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins889 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins889, <4 x float>* undef, align 16
+  store <4 x float> %vecins889, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp78 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -623,9 +623,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins892 = insertelement <4 x float> %tmp79, float %add891, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins892, <4 x float>* undef, align 16
+  store <4 x float> %vecins892, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4063D33340000000, float 0xC076433340000000, float 0x407C966660000000, float 0xC07B5199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x4063D33340000000, float 0xC076433340000000, float 0x407C966660000000, float 0xC07B5199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp80 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -633,7 +633,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add894 = fadd <4 x float> %tmp81, %tmp80
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add894, <4 x float>* undef, align 16
+  store <4 x float> %add894, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext895 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -659,7 +659,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins903 = insertelement <4 x float> %tmp84, float %add902, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins903, <4 x float>* undef, align 16
+  store <4 x float> %vecins903, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext904 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -669,7 +669,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins906 = insertelement <4 x float> %tmp85, float %add905, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07EFCCCC0000000, float 1.795000e+02, float 0x407E3E6660000000, float 0x4070633340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC07EFCCCC0000000, float 1.795000e+02, float 0x407E3E6660000000, float 0x4070633340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp86 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -677,13 +677,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add908 = fadd <4 x float> %tmp87, %tmp86
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add908, <4 x float>* undef, align 16
+  store <4 x float> %add908, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp88 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp89 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp90 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -703,7 +703,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins917 = insertelement <4 x float> %tmp92, float %add916, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins917, <4 x float>* undef, align 16
+  store <4 x float> %vecins917, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp93 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -715,17 +715,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins920 = insertelement <4 x float> %tmp94, float %add919, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins920, <4 x float>* undef, align 16
+  store <4 x float> %vecins920, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp95 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins925 = insertelement <4 x float> %tmp95, float %val, i32 0
+  %vecins925 = insertelement <4 x float> %tmp95, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins925, <4 x float>* undef, align 16
+  store <4 x float> %vecins925, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp96 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add927 = fadd float %val, 0xC0501999A0000000
+  %add927 = fadd float undef, 0xC0501999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp97 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -739,7 +739,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins931 = insertelement <4 x float> %tmp98, float %add930, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC047B33340000000, float 0x404ACCCCC0000000, float 0x40708E6660000000, float 0x4060F999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC047B33340000000, float 0x404ACCCCC0000000, float 0x40708E6660000000, float 0x4060F999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp99 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -747,11 +747,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext937 = extractelement <4 x float> %tmp100, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add941 = fadd float %val, -4.665000e+02
+  %add941 = fadd float undef, -4.665000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins942 = insertelement <4 x float> undef, float %add941, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins942, <4 x float>* undef, align 16
+  store <4 x float> %vecins942, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp101 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -763,29 +763,29 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins945 = insertelement <4 x float> %tmp102, float %add944, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins945, <4 x float>* undef, align 16
+  store <4 x float> %vecins945, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp103 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add947 = fadd float %val, 0xC051933340000000
+  %add947 = fadd float undef, 0xC051933340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp104 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins948 = insertelement <4 x float> %tmp104, float %add947, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins948, <4 x float>* undef, align 16
+  store <4 x float> %vecins948, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4060CCCCC0000000, float 0xC07BAB3340000000, float 0xC061233340000000, float 0xC076C199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x4060CCCCC0000000, float 0xC07BAB3340000000, float 0xC061233340000000, float 0xC076C199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp105 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add955 = fadd float %val, 0x4077F4CCC0000000
+  %add955 = fadd float undef, 0x4077F4CCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp106 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins956 = insertelement <4 x float> %tmp106, float %add955, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins956, <4 x float>* undef, align 16
+  store <4 x float> %vecins956, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext971 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -795,17 +795,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins973 = insertelement <4 x float> %tmp107, float %add972, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins973, <4 x float>* undef, align 16
+  store <4 x float> %vecins973, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp108 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext974 = extractelement <4 x float> %tmp108, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins976 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins976 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins976, <4 x float>* undef, align 16
+  store <4 x float> %vecins976, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x407E266660000000, float -1.225000e+02, float 0x407EB199A0000000, float 0x407BA199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x407E266660000000, float -1.225000e+02, float 0x407EB199A0000000, float 0x407BA199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp109 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -817,7 +817,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp112 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext982 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -825,7 +825,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins984 = insertelement <4 x float> undef, float %add983, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins984, <4 x float>* undef, align 16
+  store <4 x float> %vecins984, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp113 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -837,25 +837,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins987 = insertelement <4 x float> %tmp114, float %add986, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins987, <4 x float>* undef, align 16
+  store <4 x float> %vecins987, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp115 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp116 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins995 = insertelement <4 x float> %tmp116, float %val, i32 0
+  %vecins995 = insertelement <4 x float> %tmp116, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins995, <4 x float>* undef, align 16
+  store <4 x float> %vecins995, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp117 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add997 = fadd float %val, 0xC0798999A0000000
+  %add997 = fadd float undef, 0xC0798999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp118 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins998 = insertelement <4 x float> %tmp118, float %add997, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins998, <4 x float>* undef, align 16
+  store <4 x float> %vecins998, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp119 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -865,7 +865,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp120 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp121 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -879,13 +879,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1031 = fadd float %vecext1030, 2.010000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp123 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp124 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1085 = insertelement <4 x float> %tmp124, float %val, i32 2
+  %vecins1085 = insertelement <4 x float> %tmp124, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp125 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -897,13 +897,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1088 = insertelement <4 x float> %tmp126, float %add1087, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1088, <4 x float>* undef, align 16
+  store <4 x float> %vecins1088, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp127 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1090 = fadd <4 x float> undef, %tmp127
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp128 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -915,7 +915,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1096 = insertelement <4 x float> %tmp129, float %add1095, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1096, <4 x float>* undef, align 16
+  store <4 x float> %vecins1096, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp130 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -927,7 +927,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1099 = insertelement <4 x float> %tmp131, float %add1098, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1099, <4 x float>* undef, align 16
+  store <4 x float> %vecins1099, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp132 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -939,9 +939,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1102 = insertelement <4 x float> %tmp133, float %add1101, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1102, <4 x float>* undef, align 16
+  store <4 x float> %vecins1102, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4059866660000000, float 0x4072466660000000, float 0xC078FE6660000000, float 0xC058ACCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x4059866660000000, float 0x4072466660000000, float 0xC078FE6660000000, float 0xC058ACCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp134 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -961,9 +961,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp137 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1110 = insertelement <4 x float> %tmp137, float %val, i32 1
+  %vecins1110 = insertelement <4 x float> %tmp137, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1110, <4 x float>* undef, align 16
+  store <4 x float> %vecins1110, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp138 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -975,21 +975,21 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1113 = insertelement <4 x float> %tmp139, float %add1112, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1113, <4 x float>* undef, align 16
+  store <4 x float> %vecins1113, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1115 = fadd float %val, 0x4072B33340000000
+  %add1115 = fadd float undef, 0x4072B33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1116 = insertelement <4 x float> undef, float %add1115, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1116, <4 x float>* undef, align 16
+  store <4 x float> %vecins1116, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC0721999A0000000, float 0x4075633340000000, float 0x40794199A0000000, float 0x4061066660000000>, <4 x float>* undef
+  store <4 x float> <float 0xC0721999A0000000, float 0x4075633340000000, float 0x40794199A0000000, float 0x4061066660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp140 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1118 = fadd <4 x float> %tmp140, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1118, <4 x float>* undef, align 16
+  store <4 x float> %add1118, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp141 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -999,7 +999,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1121 = insertelement <4 x float> undef, float %add1120, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1121, <4 x float>* undef, align 16
+  store <4 x float> %vecins1121, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp142 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1013,9 +1013,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1125 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1127 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins1127 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1127, <4 x float>* undef, align 16
+  store <4 x float> %vecins1127, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp144 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1027,7 +1027,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1130 = insertelement <4 x float> %tmp145, float %add1129, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC06D6CCCC0000000, float 0xC032E66660000000, float -1.005000e+02, float 0x40765B3340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC06D6CCCC0000000, float 0xC032E66660000000, float -1.005000e+02, float 0x40765B3340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp146 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1045,7 +1045,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1135 = insertelement <4 x float> %tmp149, float %add1134, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1135, <4 x float>* undef, align 16
+  store <4 x float> %vecins1135, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp150 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1053,13 +1053,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp151 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1138 = insertelement <4 x float> %tmp151, float %val, i32 1
+  %vecins1138 = insertelement <4 x float> %tmp151, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1138, <4 x float>* undef, align 16
+  store <4 x float> %vecins1138, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp152 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1140 = fadd float %val, 0x407AE999A0000000
+  %add1140 = fadd float undef, 0x407AE999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp153 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1073,7 +1073,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1144 = insertelement <4 x float> %tmp154, float %add1143, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1144, <4 x float>* undef, align 16
+  store <4 x float> %vecins1144, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp155 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1081,27 +1081,27 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1146 = fadd <4 x float> %tmp156, %tmp155
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1146, <4 x float>* undef, align 16
+  store <4 x float> %add1146, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp157 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1148 = fadd float %val, 4.145000e+02
+  %add1148 = fadd float undef, 4.145000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp158 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1158 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins1158 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1158, <4 x float>* undef, align 16
+  store <4 x float> %vecins1158, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40603999A0000000, float -9.150000e+01, float 0xC051E66660000000, float -4.825000e+02>, <4 x float>* undef
+  store <4 x float> <float 0x40603999A0000000, float -9.150000e+01, float 0xC051E66660000000, float -4.825000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1218 = fadd float %val, 0xC078733340000000
+  %add1218 = fadd float undef, 0xC078733340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1219 = insertelement <4 x float> undef, float %add1218, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC0655CCCC0000000, float -4.900000e+01, float -4.525000e+02, float 4.205000e+02>, <4 x float>* undef
+  store <4 x float> <float 0xC0655CCCC0000000, float -4.900000e+01, float -4.525000e+02, float 4.205000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp159 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1113,7 +1113,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1281 = insertelement <4 x float> %tmp160, float %add1280, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1281, <4 x float>* undef, align 16
+  store <4 x float> %vecins1281, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp161 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1125,7 +1125,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1284 = insertelement <4 x float> %tmp162, float %add1283, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1284, <4 x float>* undef, align 16
+  store <4 x float> %vecins1284, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp163 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1133,27 +1133,27 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1286 = fadd <4 x float> %tmp164, %tmp163
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1286, <4 x float>* undef, align 16
+  store <4 x float> %add1286, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp165 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1288 = fadd float %val, 0xC0731199A0000000
+  %add1288 = fadd float undef, 0xC0731199A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp166 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp167 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1444 = extractelement <4 x float> %tmp167, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1460 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins1460 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1460, <4 x float>* undef, align 16
+  store <4 x float> %vecins1460, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp168 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1462 = fadd float %val, -1.670000e+02
+  %add1462 = fadd float undef, -1.670000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1463 = insertelement <4 x float> undef, float %add1462, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1167,9 +1167,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1466 = insertelement <4 x float> %tmp170, float %add1465, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1466, <4 x float>* undef, align 16
+  store <4 x float> %vecins1466, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 3.885000e+02, float 0x4054266660000000, float -9.500000e+01, float 8.500000e+01>, <4 x float>* undef
+  store <4 x float> <float 3.885000e+02, float 0x4054266660000000, float -9.500000e+01, float 8.500000e+01>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp171 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1177,17 +1177,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1468 = fadd <4 x float> %tmp172, %tmp171
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1468, <4 x float>* undef, align 16
+  store <4 x float> %add1468, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp173 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1470 = fadd float %val, 0x4033B33340000000
+  %add1470 = fadd float undef, 0x4033B33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp174 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1471 = insertelement <4 x float> %tmp174, float %add1470, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1471, <4 x float>* undef, align 16
+  store <4 x float> %vecins1471, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp175 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1205,9 +1205,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp178 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1477 = insertelement <4 x float> %tmp178, float %val, i32 2
+  %vecins1477 = insertelement <4 x float> %tmp178, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1477, <4 x float>* undef, align 16
+  store <4 x float> %vecins1477, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp179 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1219,15 +1219,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1480 = insertelement <4 x float> %tmp180, float %add1479, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1480, <4 x float>* undef, align 16
+  store <4 x float> %vecins1480, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC061B33340000000, float 3.290000e+02, float 0xC067766660000000, float 0x407DB33340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC061B33340000000, float 3.290000e+02, float 0xC067766660000000, float 0x407DB33340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp181 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp182 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp183 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1241,9 +1241,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1486 = extractelement <4 x float> %tmp185, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1502 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins1502 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1502, <4 x float>* undef, align 16
+  store <4 x float> %vecins1502, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1503 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1253,7 +1253,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1505 = insertelement <4 x float> %tmp186, float %add1504, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1505, <4 x float>* undef, align 16
+  store <4 x float> %vecins1505, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp187 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1265,9 +1265,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1508 = insertelement <4 x float> %tmp188, float %add1507, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1508, <4 x float>* undef, align 16
+  store <4 x float> %vecins1508, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40762B3340000000, float 0xC074566660000000, float 0xC07C74CCC0000000, float 0xC053F999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x40762B3340000000, float 0xC074566660000000, float 0xC07C74CCC0000000, float 0xC053F999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp189 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1275,7 +1275,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1510 = fadd <4 x float> %tmp190, %tmp189
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1510, <4 x float>* undef, align 16
+  store <4 x float> %add1510, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp191 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1289,13 +1289,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1656 = insertelement <4 x float> %tmp193, float %add1655, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1656, <4 x float>* undef, align 16
+  store <4 x float> %vecins1656, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1658 = fadd float %val, 0x40709999A0000000
+  %add1658 = fadd float undef, 0x40709999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp194 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1660 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1305,19 +1305,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1662 = insertelement <4 x float> %tmp195, float %add1661, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1662, <4 x float>* undef, align 16
+  store <4 x float> %vecins1662, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC075266660000000, float 0xC072C4CCC0000000, float 0x407C4E6660000000, float -4.485000e+02>, <4 x float>* undef
+  store <4 x float> <float 0xC075266660000000, float 0xC072C4CCC0000000, float 0x407C4E6660000000, float -4.485000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1676 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins1676 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp196 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1692 = fadd <4 x float> %tmp196, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1692, <4 x float>* undef, align 16
+  store <4 x float> %add1692, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp197 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1329,7 +1329,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1695 = insertelement <4 x float> %tmp198, float %add1694, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1695, <4 x float>* undef, align 16
+  store <4 x float> %vecins1695, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp199 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1341,7 +1341,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1698 = insertelement <4 x float> %tmp200, float %add1697, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1698, <4 x float>* undef, align 16
+  store <4 x float> %vecins1698, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp201 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1349,15 +1349,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp202 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1701 = insertelement <4 x float> %tmp202, float %val, i32 2
+  %vecins1701 = insertelement <4 x float> %tmp202, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1701, <4 x float>* undef, align 16
+  store <4 x float> %vecins1701, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp203 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1704 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins1704 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC075933340000000, float 0xC0489999A0000000, float 0xC078AB3340000000, float 0x406DFCCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC075933340000000, float 0xC0489999A0000000, float 0xC078AB3340000000, float 0x406DFCCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp204 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1365,9 +1365,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp206 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1709 = insertelement <4 x float> %tmp206, float %val, i32 0
+  %vecins1709 = insertelement <4 x float> %tmp206, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1709, <4 x float>* undef, align 16
+  store <4 x float> %vecins1709, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp207 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1375,11 +1375,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1714 = fadd float %vecext1713, 0xC0703199A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1723 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins1723 = insertelement <4 x float> undef, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp208 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1730 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1389,9 +1389,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1732 = insertelement <4 x float> %tmp209, float %add1731, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1732, <4 x float>* undef, align 16
+  store <4 x float> %vecins1732, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40551999A0000000, float 0xC0708999A0000000, float 0xC054F33340000000, float 0xC07C5999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x40551999A0000000, float 0xC0708999A0000000, float 0xC054F33340000000, float 0xC07C5999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp210 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1399,7 +1399,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp211 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1736 = fadd float %val, 0x407C3999A0000000
+  %add1736 = fadd float undef, 0x407C3999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp212 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1415,7 +1415,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1740 = insertelement <4 x float> %tmp214, float %add1739, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1740, <4 x float>* undef, align 16
+  store <4 x float> %vecins1740, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp215 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1427,25 +1427,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1743 = insertelement <4 x float> %tmp216, float %add1742, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1743, <4 x float>* undef, align 16
+  store <4 x float> %vecins1743, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1744 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp217 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1746 = insertelement <4 x float> %tmp217, float %val, i32 3
+  %vecins1746 = insertelement <4 x float> %tmp217, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC076466660000000, float 0x4060BCCCC0000000, float 0x405EF999A0000000, float 0x4074766660000000>, <4 x float>* undef
+  store <4 x float> <float 0xC076466660000000, float 0x4060BCCCC0000000, float 0x405EF999A0000000, float 0x4074766660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp218 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1748 = fadd <4 x float> undef, %tmp218
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1748, <4 x float>* undef, align 16
+  store <4 x float> %add1748, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp219 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1750 = fadd float %val, 0x407C6B3340000000
+  %add1750 = fadd float undef, 0x407C6B3340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1751 = insertelement <4 x float> undef, float %add1750, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1467,21 +1467,21 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp223 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1759 = fadd float %val, 0x40678999A0000000
+  %add1759 = fadd float undef, 0x40678999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp224 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1760 = insertelement <4 x float> %tmp224, float %add1759, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1760, <4 x float>* undef, align 16
+  store <4 x float> %vecins1760, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x405E333340000000, float 0x40571999A0000000, float 0xC02E333340000000, float 0x4053A66660000000>, <4 x float>* undef
+  store <4 x float> <float 0x405E333340000000, float 0x40571999A0000000, float 0xC02E333340000000, float 0x4053A66660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp225 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1762 = fadd <4 x float> undef, %tmp225
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1762, <4 x float>* undef, align 16
+  store <4 x float> %add1762, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp226 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1493,7 +1493,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1765 = insertelement <4 x float> %tmp227, float %add1764, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1765, <4 x float>* undef, align 16
+  store <4 x float> %vecins1765, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp228 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1505,7 +1505,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1768 = insertelement <4 x float> %tmp229, float %add1767, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1768, <4 x float>* undef, align 16
+  store <4 x float> %vecins1768, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1769 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1515,7 +1515,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1771 = insertelement <4 x float> %tmp230, float %add1770, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1771, <4 x float>* undef, align 16
+  store <4 x float> %vecins1771, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp231 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1525,13 +1525,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp234 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1779 = insertelement <4 x float> %tmp234, float %val, i32 0
+  %vecins1779 = insertelement <4 x float> %tmp234, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1779, <4 x float>* undef, align 16
+  store <4 x float> %vecins1779, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp235 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp236 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1541,9 +1541,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1785 = insertelement <4 x float> undef, float %add1784, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1785, <4 x float>* undef, align 16
+  store <4 x float> %vecins1785, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07074CCC0000000, float 0xC04D666660000000, float 3.235000e+02, float 0xC0724199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC07074CCC0000000, float 0xC04D666660000000, float 3.235000e+02, float 0xC0724199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp237 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1559,25 +1559,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1793 = insertelement <4 x float> %tmp239, float %add1792, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1793, <4 x float>* undef, align 16
+  store <4 x float> %vecins1793, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp240 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1795 = fadd float %val, 0x4055266660000000
+  %add1795 = fadd float undef, 0x4055266660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp241 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1796 = insertelement <4 x float> %tmp241, float %add1795, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1799 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins1799 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1800 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp242 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float -6.600000e+01, float 0xC07B2199A0000000, float 0x4011333340000000, float 0xC0635CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float -6.600000e+01, float 0xC07B2199A0000000, float 0x4011333340000000, float 0xC0635CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp243 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1587,7 +1587,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp246 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1865 = fadd float %val, -2.235000e+02
+  %add1865 = fadd float undef, -2.235000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp247 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1597,33 +1597,33 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp249 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1872 = insertelement <4 x float> %tmp249, float %val, i32 3
+  %vecins1872 = insertelement <4 x float> %tmp249, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x406B8999A0000000, float 0xC0696CCCC0000000, float 0xC07A34CCC0000000, float 0x407654CCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x406B8999A0000000, float 0xC0696CCCC0000000, float 0xC07A34CCC0000000, float 0x407654CCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp250 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1874 = fadd <4 x float> %tmp250, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1874, <4 x float>* undef, align 16
+  store <4 x float> %add1874, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1875 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp251 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1894 = insertelement <4 x float> %tmp251, float %val, i32 1
+  %vecins1894 = insertelement <4 x float> %tmp251, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp252 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1895 = extractelement <4 x float> %tmp252, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1900 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins1900 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1900, <4 x float>* undef, align 16
+  store <4 x float> %vecins1900, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1905 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins1905 = insertelement <4 x float> undef, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1905, <4 x float>* undef, align 16
+  store <4 x float> %vecins1905, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp253 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1633,7 +1633,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1908 = insertelement <4 x float> undef, float %add1907, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1908, <4 x float>* undef, align 16
+  store <4 x float> %vecins1908, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1909 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1649,23 +1649,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1916 = fadd <4 x float> %tmp256, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add1916, <4 x float>* undef, align 16
+  store <4 x float> %add1916, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1923 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp257 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add1927 = fadd float %val, 0x40761999A0000000
+  %add1927 = fadd float undef, 0x40761999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp258 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1928 = insertelement <4 x float> %tmp258, float %add1927, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1928, <4 x float>* undef, align 16
+  store <4 x float> %vecins1928, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 7.100000e+01, float 0xC0634999A0000000, float 0x407B0B3340000000, float 0xC07DE999A0000000>, <4 x float>* undef
+  store <4 x float> <float 7.100000e+01, float 0xC0634999A0000000, float 0x407B0B3340000000, float 0xC07DE999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp259 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1677,9 +1677,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp262 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1933 = insertelement <4 x float> %tmp262, float %val, i32 0
+  %vecins1933 = insertelement <4 x float> %tmp262, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1933, <4 x float>* undef, align 16
+  store <4 x float> %vecins1933, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp263 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1693,15 +1693,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1940 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1942 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins1942 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float -8.200000e+01, float 0xC04C733340000000, float 0xC077ACCCC0000000, float 0x4074566660000000>, <4 x float>* undef
+  store <4 x float> <float -8.200000e+01, float 0xC04C733340000000, float 0xC077ACCCC0000000, float 0x4074566660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp265 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp266 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp267 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1709,13 +1709,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add1946 = fadd float %vecext1945, 0xC074866660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1953 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins1953 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1953, <4 x float>* undef, align 16
+  store <4 x float> %vecins1953, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp268 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp269 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1737,15 +1737,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1964 = insertelement <4 x float> %tmp272, float %add1963, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1964, <4 x float>* undef, align 16
+  store <4 x float> %vecins1964, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1965 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp273 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1967 = insertelement <4 x float> %tmp273, float %val, i32 2
+  %vecins1967 = insertelement <4 x float> %tmp273, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1967, <4 x float>* undef, align 16
+  store <4 x float> %vecins1967, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp274 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1757,9 +1757,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1970 = insertelement <4 x float> %tmp275, float %add1969, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1970, <4 x float>* undef, align 16
+  store <4 x float> %vecins1970, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x402E9999A0000000, float 0x407344CCC0000000, float -4.165000e+02, float 0x4078FCCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x402E9999A0000000, float 0x407344CCC0000000, float -4.165000e+02, float 0x4078FCCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp276 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1767,31 +1767,31 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp278 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1975 = insertelement <4 x float> %tmp278, float %val, i32 0
+  %vecins1975 = insertelement <4 x float> %tmp278, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1975, <4 x float>* undef, align 16
+  store <4 x float> %vecins1975, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp279 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1976 = extractelement <4 x float> %tmp279, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1978 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins1978 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1978, <4 x float>* undef, align 16
+  store <4 x float> %vecins1978, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1979 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1981 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins1981 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1981, <4 x float>* undef, align 16
+  store <4 x float> %vecins1981, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins1984 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins1984 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1984, <4 x float>* undef, align 16
+  store <4 x float> %vecins1984, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC06A766660000000, float 0xC07CE4CCC0000000, float -1.055000e+02, float 0x40786E6660000000>, <4 x float>* undef
+  store <4 x float> <float 0xC06A766660000000, float 0xC07CE4CCC0000000, float -1.055000e+02, float 0x40786E6660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext1990 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1803,11 +1803,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins1998 = insertelement <4 x float> %tmp280, float %add1997, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins1998, <4 x float>* undef, align 16
+  store <4 x float> %vecins1998, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC0794E6660000000, float 0xC073CCCCC0000000, float 0x407994CCC0000000, float 6.500000e+01>, <4 x float>* undef
+  store <4 x float> <float 0xC0794E6660000000, float 0xC073CCCCC0000000, float 0x407994CCC0000000, float 6.500000e+01>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2004 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1817,7 +1817,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2006 = insertelement <4 x float> %tmp281, float %add2005, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2006, <4 x float>* undef, align 16
+  store <4 x float> %vecins2006, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp282 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1825,7 +1825,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp283 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2009 = insertelement <4 x float> %tmp283, float %val, i32 2
+  %vecins2009 = insertelement <4 x float> %tmp283, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp284 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1837,15 +1837,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2012 = insertelement <4 x float> %tmp285, float %add2011, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2012, <4 x float>* undef, align 16
+  store <4 x float> %vecins2012, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC04E733340000000, float 0xC074566660000000, float 0x4079F66660000000, float 0xC0705B3340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC04E733340000000, float 0xC074566660000000, float 0x4079F66660000000, float 0xC0705B3340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp286 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp287 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp288 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1857,7 +1857,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2017 = insertelement <4 x float> %tmp289, float %add2016, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add2022 = fadd float %val, 8.350000e+01
+  %add2022 = fadd float undef, 8.350000e+01
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp290 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1871,7 +1871,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add2028 = fadd <4 x float> %tmp292, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add2028, <4 x float>* undef, align 16
+  store <4 x float> %add2028, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2029 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1879,11 +1879,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp293 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp294 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add2036 = fadd float %val, 0x407DE66660000000
+  %add2036 = fadd float undef, 0x407DE66660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp295 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1895,9 +1895,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp299 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2045 = insertelement <4 x float> %tmp299, float %val, i32 0
+  %vecins2045 = insertelement <4 x float> %tmp299, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2045, <4 x float>* undef, align 16
+  store <4 x float> %vecins2045, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp300 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1905,35 +1905,35 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add2047 = fadd float %vecext2046, 0xC065433340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2052 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp301 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2054 = insertelement <4 x float> %tmp301, float %val, i32 3
+  %vecins2054 = insertelement <4 x float> %tmp301, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2054, <4 x float>* undef, align 16
+  store <4 x float> %vecins2054, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4024666660000000, float 0x4079366660000000, float 0x40721B3340000000, float 0x406E533340000000>, <4 x float>* undef
+  store <4 x float> <float 0x4024666660000000, float 0x4079366660000000, float 0x40721B3340000000, float 0x406E533340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp302 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add2056 = fadd <4 x float> undef, %tmp302
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add2056, <4 x float>* undef, align 16
+  store <4 x float> %add2056, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp303 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp304 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2062 = insertelement <4 x float> %tmp304, float %val, i32 1
+  %vecins2062 = insertelement <4 x float> %tmp304, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2062, <4 x float>* undef, align 16
+  store <4 x float> %vecins2062, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp305 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp306 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1943,9 +1943,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2068 = insertelement <4 x float> undef, float %add2067, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2068, <4 x float>* undef, align 16
+  store <4 x float> %vecins2068, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07EFCCCC0000000, float -3.420000e+02, float 0xC07BC999A0000000, float 0x40751999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC07EFCCCC0000000, float -3.420000e+02, float 0xC07BC999A0000000, float 0x40751999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp307 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1953,7 +1953,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add2070 = fadd <4 x float> %tmp308, %tmp307
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add2070, <4 x float>* undef, align 16
+  store <4 x float> %add2070, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp309 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1965,7 +1965,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2073 = insertelement <4 x float> %tmp310, float %add2072, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2073, <4 x float>* undef, align 16
+  store <4 x float> %vecins2073, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp311 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1973,7 +1973,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp312 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2076 = insertelement <4 x float> %tmp312, float %val, i32 1
+  %vecins2076 = insertelement <4 x float> %tmp312, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp313 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1985,7 +1985,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2079 = insertelement <4 x float> %tmp314, float %add2078, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2079, <4 x float>* undef, align 16
+  store <4 x float> %vecins2079, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp315 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -1997,15 +1997,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2082 = insertelement <4 x float> %tmp316, float %add2081, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2082, <4 x float>* undef, align 16
+  store <4 x float> %vecins2082, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40764E6660000000, float 0x40501999A0000000, float 0xC079A4CCC0000000, float 0x4050533340000000>, <4 x float>* undef
+  store <4 x float> <float 0x40764E6660000000, float 0x40501999A0000000, float 0xC079A4CCC0000000, float 0x4050533340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp317 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp318 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp319 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2015,7 +2015,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2087 = insertelement <4 x float> undef, float %add2086, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2087, <4 x float>* undef, align 16
+  store <4 x float> %vecins2087, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2480 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2029,23 +2029,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2485 = insertelement <4 x float> %tmp320, float %add2484, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2485, <4 x float>* undef, align 16
+  store <4 x float> %vecins2485, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp321 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add2487 = fadd float %val, 2.030000e+02
+  %add2487 = fadd float undef, 2.030000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp322 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4073DE6660000000, float 0x4067CCCCC0000000, float 0xC03F1999A0000000, float 4.350000e+01>, <4 x float>* undef
+  store <4 x float> <float 0x4073DE6660000000, float 0x4067CCCCC0000000, float 0xC03F1999A0000000, float 4.350000e+01>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2491 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp323 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp324 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2055,9 +2055,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp325 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2499 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins2499 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2499, <4 x float>* undef, align 16
+  store <4 x float> %vecins2499, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2500 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2079,7 +2079,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp329 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add2534 = fadd float %val, 0x4072C66660000000
+  %add2534 = fadd float undef, 0x4072C66660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2536 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2089,15 +2089,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2538 = insertelement <4 x float> %tmp330, float %add2537, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2538, <4 x float>* undef, align 16
+  store <4 x float> %vecins2538, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2539 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add2540 = fadd float %vecext2539, 0x406F9999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2580 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins2580 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2580, <4 x float>* undef, align 16
+  store <4 x float> %vecins2580, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp331 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2107,7 +2107,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2583 = insertelement <4 x float> undef, float %add2582, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2583, <4 x float>* undef, align 16
+  store <4 x float> %vecins2583, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2584 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2115,21 +2115,21 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp332 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40773199A0000000, float 0x407D7999A0000000, float 0xC0717199A0000000, float 0xC07E9CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x40773199A0000000, float 0x407D7999A0000000, float 0xC0717199A0000000, float 0xC07E9CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add2590 = fadd float %val, 0x407B1999A0000000
+  %add2590 = fadd float undef, 0x407B1999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp333 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp334 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add2672 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add2672, <4 x float>* undef, align 16
+  store <4 x float> %add2672, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp335 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2141,37 +2141,37 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2678 = insertelement <4 x float> %tmp336, float %add2677, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2678, <4 x float>* undef, align 16
+  store <4 x float> %vecins2678, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp337 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2679 = extractelement <4 x float> %tmp337, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2681 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins2681 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2681, <4 x float>* undef, align 16
+  store <4 x float> %vecins2681, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp338 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext2682 = extractelement <4 x float> %tmp338, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2684 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins2684 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp339 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp340 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp341 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add2688 = fadd float %val, 0x4063266660000000
+  %add2688 = fadd float undef, 0x4063266660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins2692 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins2692 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2692, <4 x float>* undef, align 16
+  store <4 x float> %vecins2692, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp342 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2183,9 +2183,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins2698 = insertelement <4 x float> %tmp343, float %add2697, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins2698, <4 x float>* undef, align 16
+  store <4 x float> %vecins2698, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40547999A0000000, float 0xC060633340000000, float 0x4075766660000000, float 0x4072D33340000000>, <4 x float>* undef
+  store <4 x float> <float 0x40547999A0000000, float 0xC060633340000000, float 0x4075766660000000, float 0x4072D33340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp344 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2193,7 +2193,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add2700 = fadd <4 x float> %tmp345, %tmp344
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add2700, <4 x float>* undef, align 16
+  store <4 x float> %add2700, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp346 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2207,15 +2207,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp349 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3121 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3125 = fadd float %val, 0xC06F266660000000
+  %add3125 = fadd float undef, 0xC06F266660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3126 = insertelement <4 x float> undef, float %add3125, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3126, <4 x float>* undef, align 16
+  store <4 x float> %vecins3126, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp350 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2227,11 +2227,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3129 = insertelement <4 x float> %tmp351, float %add3128, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3129, <4 x float>* undef, align 16
+  store <4 x float> %vecins3129, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp352 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3131 = fadd float %val, 3.215000e+02
+  %add3131 = fadd float undef, 3.215000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp353 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2239,15 +2239,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add3134 = fadd <4 x float> %tmp354, %tmp353
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add3134, <4 x float>* undef, align 16
+  store <4 x float> %add3134, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp355 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3136 = fadd float %val, 0x4074333340000000
+  %add3136 = fadd float undef, 0x4074333340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins3140 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins3140 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3140, <4 x float>* undef, align 16
+  store <4 x float> %vecins3140, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp356 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2259,7 +2259,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3143 = insertelement <4 x float> %tmp357, float %add3142, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3143, <4 x float>* undef, align 16
+  store <4 x float> %vecins3143, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp358 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2271,15 +2271,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3146 = insertelement <4 x float> %tmp359, float %add3145, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3146, <4 x float>* undef, align 16
+  store <4 x float> %vecins3146, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp360 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins3272 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins3272 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3272, <4 x float>* undef, align 16
+  store <4 x float> %vecins3272, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x407B4999A0000000, float 0x40695CCCC0000000, float 0xC05C0CCCC0000000, float 0x407EB33340000000>, <4 x float>* undef
+  store <4 x float> <float 0x407B4999A0000000, float 0x40695CCCC0000000, float 0xC05C0CCCC0000000, float 0x407EB33340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp361 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2287,7 +2287,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add3274 = fadd <4 x float> %tmp362, %tmp361
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add3274, <4 x float>* undef, align 16
+  store <4 x float> %add3274, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp363 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2299,7 +2299,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3277 = insertelement <4 x float> %tmp364, float %add3276, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3277, <4 x float>* undef, align 16
+  store <4 x float> %vecins3277, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp365 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2309,7 +2309,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3280 = insertelement <4 x float> undef, float %add3279, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3280, <4 x float>* undef, align 16
+  store <4 x float> %vecins3280, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp366 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2321,7 +2321,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3283 = insertelement <4 x float> %tmp367, float %add3282, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3283, <4 x float>* undef, align 16
+  store <4 x float> %vecins3283, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp368 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2333,7 +2333,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp369 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp370 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2345,7 +2345,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3291 = insertelement <4 x float> %tmp371, float %add3290, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3291, <4 x float>* undef, align 16
+  store <4 x float> %vecins3291, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3292 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2353,11 +2353,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp373 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins3328 = insertelement <4 x float> %tmp373, float %val, i32 3
+  %vecins3328 = insertelement <4 x float> %tmp373, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add3330 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add3330, <4 x float>* undef, align 16
+  store <4 x float> %add3330, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3331 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2367,7 +2367,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3333 = insertelement <4 x float> %tmp374, float %add3332, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3333, <4 x float>* undef, align 16
+  store <4 x float> %vecins3333, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3334 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2385,7 +2385,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3339 = insertelement <4 x float> %tmp376, float %add3338, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3339, <4 x float>* undef, align 16
+  store <4 x float> %vecins3339, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp377 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2393,13 +2393,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp378 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins3342 = insertelement <4 x float> %tmp378, float %val, i32 3
+  %vecins3342 = insertelement <4 x float> %tmp378, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp379 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add3344 = fadd <4 x float> %tmp379, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add3344, <4 x float>* undef, align 16
+  store <4 x float> %add3344, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp380 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2419,15 +2419,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3350 = insertelement <4 x float> %tmp382, float %add3349, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3350, <4 x float>* undef, align 16
+  store <4 x float> %vecins3350, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3352 = fadd float %val, 0xC06ACCCCC0000000
+  %add3352 = fadd float undef, 0xC06ACCCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp383 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins3423 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins3423 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3423, <4 x float>* undef, align 16
+  store <4 x float> %vecins3423, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3424 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2437,9 +2437,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3426 = insertelement <4 x float> %tmp384, float %add3425, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3426, <4 x float>* undef, align 16
+  store <4 x float> %vecins3426, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 2.795000e+02, float -4.065000e+02, float 0xC05CD999A0000000, float 1.825000e+02>, <4 x float>* undef
+  store <4 x float> <float 2.795000e+02, float -4.065000e+02, float 0xC05CD999A0000000, float 1.825000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp385 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2457,7 +2457,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3431 = insertelement <4 x float> %tmp388, float %add3430, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3431, <4 x float>* undef, align 16
+  store <4 x float> %vecins3431, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp389 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2469,15 +2469,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3434 = insertelement <4 x float> %tmp390, float %add3433, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3434, <4 x float>* undef, align 16
+  store <4 x float> %vecins3434, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3435 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp391 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins3437 = insertelement <4 x float> %tmp391, float %val, i32 2
+  %vecins3437 = insertelement <4 x float> %tmp391, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3437, <4 x float>* undef, align 16
+  store <4 x float> %vecins3437, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp392 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2485,7 +2485,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add3439 = fadd float %vecext3438, 0xC071D999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC0798199A0000000, float -3.385000e+02, float 0xC050066660000000, float 0xC075E999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC0798199A0000000, float -3.385000e+02, float 0xC050066660000000, float 0xC075E999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp393 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2493,7 +2493,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add3442 = fadd <4 x float> %tmp394, %tmp393
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add3442, <4 x float>* undef, align 16
+  store <4 x float> %add3442, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3443 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2509,7 +2509,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3448 = insertelement <4 x float> %tmp396, float %add3447, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3448, <4 x float>* undef, align 16
+  store <4 x float> %vecins3448, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp397 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2521,15 +2521,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3451 = insertelement <4 x float> %tmp398, float %add3450, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3451, <4 x float>* undef, align 16
+  store <4 x float> %vecins3451, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3453 = fadd float %val, 0xC07ADCCCC0000000
+  %add3453 = fadd float undef, 0xC07ADCCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp399 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3454 = insertelement <4 x float> %tmp399, float %add3453, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3454, <4 x float>* undef, align 16
+  store <4 x float> %vecins3454, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp400 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2539,7 +2539,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3459 = insertelement <4 x float> undef, float %add3458, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3459, <4 x float>* undef, align 16
+  store <4 x float> %vecins3459, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp401 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2547,19 +2547,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp402 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins3462 = insertelement <4 x float> %tmp402, float %val, i32 1
+  %vecins3462 = insertelement <4 x float> %tmp402, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3462, <4 x float>* undef, align 16
+  store <4 x float> %vecins3462, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp403 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3464 = fadd float %val, 0xC057B999A0000000
+  %add3464 = fadd float undef, 0xC057B999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp404 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3465 = insertelement <4 x float> %tmp404, float %add3464, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3465, <4 x float>* undef, align 16
+  store <4 x float> %vecins3465, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp405 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2569,21 +2569,21 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp406 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x405C3999A0000000, float 0xC07C6B3340000000, float 0x407ACB3340000000, float 0xC06E0999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x405C3999A0000000, float 0xC07C6B3340000000, float 0x407ACB3340000000, float 0xC06E0999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp407 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp408 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3477 = extractelement <4 x float> %tmp408, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins3479 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins3479 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3479, <4 x float>* undef, align 16
+  store <4 x float> %vecins3479, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3480 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2593,23 +2593,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3482 = insertelement <4 x float> %tmp409, float %add3481, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3482, <4 x float>* undef, align 16
+  store <4 x float> %vecins3482, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 3.565000e+02, float 0xC0464CCCC0000000, float 0x4037666660000000, float 0xC0788CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 3.565000e+02, float 0xC0464CCCC0000000, float 0x4037666660000000, float 0xC0788CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp410 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add3484 = fadd <4 x float> %tmp410, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add3484, <4 x float>* undef, align 16
+  store <4 x float> %add3484, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp411 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3486 = fadd float %val, -1.415000e+02
+  %add3486 = fadd float undef, -1.415000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3487 = insertelement <4 x float> undef, float %add3486, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3487, <4 x float>* undef, align 16
+  store <4 x float> %vecins3487, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp412 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2621,25 +2621,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3490 = insertelement <4 x float> %tmp413, float %add3489, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3490, <4 x float>* undef, align 16
+  store <4 x float> %vecins3490, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3492 = fadd float %val, 0x4078066660000000
+  %add3492 = fadd float undef, 0x4078066660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp414 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3493 = insertelement <4 x float> %tmp414, float %add3492, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3493, <4 x float>* undef, align 16
+  store <4 x float> %vecins3493, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp415 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3495 = fadd float %val, 0xC0798999A0000000
+  %add3495 = fadd float undef, 0xC0798999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp416 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3496 = insertelement <4 x float> %tmp416, float %add3495, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3496, <4 x float>* undef, align 16
+  store <4 x float> %vecins3496, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp417 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2647,7 +2647,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add3498 = fadd <4 x float> %tmp418, %tmp417
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add3498, <4 x float>* undef, align 16
+  store <4 x float> %add3498, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3499 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2663,25 +2663,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp420 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3506 = fadd float %val, 0xC074DB3340000000
+  %add3506 = fadd float undef, 0xC074DB3340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp421 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins3507 = insertelement <4 x float> %tmp421, float %add3506, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins3507, <4 x float>* undef, align 16
+  store <4 x float> %vecins3507, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add3509 = fadd float %val, 0xC066033340000000
+  %add3509 = fadd float undef, 0xC066033340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp422 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x404B333340000000, float 4.680000e+02, float 0x40577999A0000000, float 0xC07D9999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x404B333340000000, float 4.680000e+02, float 0x40577999A0000000, float 0xC07D9999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp423 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3513 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2693,9 +2693,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext3516 = extractelement <4 x float> %tmp425, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5414 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins5414 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5414, <4 x float>* undef, align 16
+  store <4 x float> %vecins5414, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp426 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2703,33 +2703,33 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5416 = fadd <4 x float> %tmp427, %tmp426
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5416, <4 x float>* undef, align 16
+  store <4 x float> %add5416, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp428 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add5418 = fadd float %val, 0xC07ED999A0000000
+  %add5418 = fadd float undef, 0xC07ED999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp429 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5419 = insertelement <4 x float> %tmp429, float %add5418, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5624 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins5624 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5624, <4 x float>* undef, align 16
+  store <4 x float> %vecins5624, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07B4999A0000000, float 0x4078B33340000000, float 0xC07674CCC0000000, float 0xC07C533340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC07B4999A0000000, float 0x4078B33340000000, float 0xC07674CCC0000000, float 0xC07C533340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5626 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5626, <4 x float>* undef, align 16
+  store <4 x float> %add5626, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext5627 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp430 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5629 = insertelement <4 x float> %tmp430, float %val, i32 0
+  %vecins5629 = insertelement <4 x float> %tmp430, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5629, <4 x float>* undef, align 16
+  store <4 x float> %vecins5629, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp431 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2739,13 +2739,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5632 = insertelement <4 x float> undef, float %add5631, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5632, <4 x float>* undef, align 16
+  store <4 x float> %vecins5632, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp432 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5688 = insertelement <4 x float> %tmp432, float %val, i32 1
+  %vecins5688 = insertelement <4 x float> %tmp432, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5688, <4 x float>* undef, align 16
+  store <4 x float> %vecins5688, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp433 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2753,35 +2753,35 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp434 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5691 = insertelement <4 x float> %tmp434, float %val, i32 2
+  %vecins5691 = insertelement <4 x float> %tmp434, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5691, <4 x float>* undef, align 16
+  store <4 x float> %vecins5691, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext5692 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float -4.350000e+02, float 0xC0775CCCC0000000, float 0xC0714999A0000000, float 0xC0661999A0000000>, <4 x float>* undef
+  store <4 x float> <float -4.350000e+02, float 0xC0775CCCC0000000, float 0xC0714999A0000000, float 0xC0661999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp435 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5696 = fadd <4 x float> undef, %tmp435
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5696, <4 x float>* undef, align 16
+  store <4 x float> %add5696, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add5701 = fadd float %val, 0x4077D4CCC0000000
+  %add5701 = fadd float undef, 0x4077D4CCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp436 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5702 = insertelement <4 x float> %tmp436, float %add5701, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5702, <4 x float>* undef, align 16
+  store <4 x float> %vecins5702, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp437 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp438 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5705 = insertelement <4 x float> %tmp438, float %val, i32 2
+  %vecins5705 = insertelement <4 x float> %tmp438, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5705, <4 x float>* undef, align 16
+  store <4 x float> %vecins5705, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp439 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2793,9 +2793,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5708 = insertelement <4 x float> %tmp440, float %add5707, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5708, <4 x float>* undef, align 16
+  store <4 x float> %vecins5708, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x405D666660000000, float 0xC069333340000000, float 0x407B6B3340000000, float 0xC06EB33340000000>, <4 x float>* undef
+  store <4 x float> <float 0x405D666660000000, float 0xC069333340000000, float 0x407B6B3340000000, float 0xC06EB33340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp441 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2803,7 +2803,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5710 = fadd <4 x float> %tmp442, %tmp441
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5710, <4 x float>* undef, align 16
+  store <4 x float> %add5710, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp443 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2815,19 +2815,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5713 = insertelement <4 x float> %tmp444, float %add5712, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5713, <4 x float>* undef, align 16
+  store <4 x float> %vecins5713, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp445 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp446 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5716 = insertelement <4 x float> %tmp446, float %val, i32 1
+  %vecins5716 = insertelement <4 x float> %tmp446, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp447 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5724 = fadd <4 x float> %tmp447, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5724, <4 x float>* undef, align 16
+  store <4 x float> %add5724, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp448 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2835,21 +2835,21 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp449 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5750 = insertelement <4 x float> %tmp449, float %val, i32 3
+  %vecins5750 = insertelement <4 x float> %tmp449, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40692999A0000000, float 0xC07C4CCCC0000000, float 0x407D1E6660000000, float 0x407B4199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x40692999A0000000, float 0xC07C4CCCC0000000, float 0x407D1E6660000000, float 0x407B4199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp450 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5752 = fadd <4 x float> undef, %tmp450
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add5754 = fadd float %val, 0xC064033340000000
+  %add5754 = fadd float undef, 0xC064033340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp451 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5755 = insertelement <4 x float> %tmp451, float %add5754, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5755, <4 x float>* undef, align 16
+  store <4 x float> %vecins5755, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp452 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2861,7 +2861,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5758 = insertelement <4 x float> %tmp453, float %add5757, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5758, <4 x float>* undef, align 16
+  store <4 x float> %vecins5758, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp454 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2869,9 +2869,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp455 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5761 = insertelement <4 x float> %tmp455, float %val, i32 2
+  %vecins5761 = insertelement <4 x float> %tmp455, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5761, <4 x float>* undef, align 16
+  store <4 x float> %vecins5761, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp456 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2883,13 +2883,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5764 = insertelement <4 x float> %tmp457, float %add5763, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5764, <4 x float>* undef, align 16
+  store <4 x float> %vecins5764, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x407A6B3340000000, float 0x40470CCCC0000000, float 0xC076F4CCC0000000, float 0x40791999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x407A6B3340000000, float 0x40470CCCC0000000, float 0xC076F4CCC0000000, float 0x40791999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5766 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5766, <4 x float>* undef, align 16
+  store <4 x float> %add5766, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp458 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2901,9 +2901,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5769 = insertelement <4 x float> %tmp459, float %add5768, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5769, <4 x float>* undef, align 16
+  store <4 x float> %vecins5769, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add5771 = fadd float %val, 8.000000e+00
+  %add5771 = fadd float undef, 8.000000e+00
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp460 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2911,11 +2911,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp461 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add5796 = fadd float %val, 0x4058ECCCC0000000
+  %add5796 = fadd float undef, 0x4058ECCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5797 = insertelement <4 x float> undef, float %add5796, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5797, <4 x float>* undef, align 16
+  store <4 x float> %vecins5797, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp462 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2923,7 +2923,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp463 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5800 = insertelement <4 x float> %tmp463, float %val, i32 1
+  %vecins5800 = insertelement <4 x float> %tmp463, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp464 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2935,7 +2935,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5803 = insertelement <4 x float> %tmp465, float %add5802, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5803, <4 x float>* undef, align 16
+  store <4 x float> %vecins5803, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp466 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2947,11 +2947,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5806 = insertelement <4 x float> %tmp467, float %add5805, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5806, <4 x float>* undef, align 16
+  store <4 x float> %vecins5806, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp468 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp469 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2961,7 +2961,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp470 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp471 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2973,9 +2973,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5820 = insertelement <4 x float> %tmp472, float %add5819, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5820, <4 x float>* undef, align 16
+  store <4 x float> %vecins5820, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40514CCCC0000000, float 0x406A7999A0000000, float 0xC078766660000000, float 0xC0522CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x40514CCCC0000000, float 0x406A7999A0000000, float 0xC078766660000000, float 0xC0522CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp473 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2983,7 +2983,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5822 = fadd <4 x float> %tmp474, %tmp473
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5822, <4 x float>* undef, align 16
+  store <4 x float> %add5822, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp475 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -2991,7 +2991,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp476 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5825 = insertelement <4 x float> %tmp476, float %val, i32 0
+  %vecins5825 = insertelement <4 x float> %tmp476, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp477 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3003,7 +3003,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5828 = insertelement <4 x float> %tmp478, float %add5827, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5828, <4 x float>* undef, align 16
+  store <4 x float> %vecins5828, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp479 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3015,19 +3015,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5831 = insertelement <4 x float> %tmp480, float %add5830, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float -3.370000e+02, float 0xC072DE6660000000, float -2.670000e+02, float 0x4062333340000000>, <4 x float>* undef
+  store <4 x float> <float -3.370000e+02, float 0xC072DE6660000000, float -2.670000e+02, float 0x4062333340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp481 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext5837 = extractelement <4 x float> %tmp481, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5839 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins5839 = insertelement <4 x float> undef, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5839, <4 x float>* undef, align 16
+  store <4 x float> %vecins5839, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp482 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3035,33 +3035,33 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp483 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5842 = insertelement <4 x float> %tmp483, float %val, i32 1
+  %vecins5842 = insertelement <4 x float> %tmp483, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5842, <4 x float>* undef, align 16
+  store <4 x float> %vecins5842, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp484 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp485 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5845 = insertelement <4 x float> %tmp485, float %val, i32 2
+  %vecins5845 = insertelement <4 x float> %tmp485, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5845, <4 x float>* undef, align 16
+  store <4 x float> %vecins5845, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC06EC999A0000000, float 0x406D5999A0000000, float 0x4056F33340000000, float 0xC07E14CCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC06EC999A0000000, float 0x406D5999A0000000, float 0x4056F33340000000, float 0xC07E14CCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5850 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5850, <4 x float>* undef, align 16
+  store <4 x float> %add5850, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp486 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add5852 = fadd float %val, 2.985000e+02
+  %add5852 = fadd float undef, 2.985000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp487 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5853 = insertelement <4 x float> %tmp487, float %add5852, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5853, <4 x float>* undef, align 16
+  store <4 x float> %vecins5853, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp488 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3073,17 +3073,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5856 = insertelement <4 x float> %tmp489, float %add5855, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5856, <4 x float>* undef, align 16
+  store <4 x float> %vecins5856, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp490 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add5858 = fadd float %val, 0x4071666660000000
+  %add5858 = fadd float undef, 0x4071666660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp491 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5859 = insertelement <4 x float> %tmp491, float %add5858, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5859, <4 x float>* undef, align 16
+  store <4 x float> %vecins5859, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp492 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3099,19 +3099,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5901 = insertelement <4 x float> %tmp494, float %add5900, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5901, <4 x float>* undef, align 16
+  store <4 x float> %vecins5901, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add5914 = fadd float %val, 0x40786E6660000000
+  %add5914 = fadd float undef, 0x40786E6660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins5918 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins5918 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5918, <4 x float>* undef, align 16
+  store <4 x float> %vecins5918, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x406F266660000000, float 7.900000e+01, float -4.695000e+02, float -4.880000e+02>, <4 x float>* undef
+  store <4 x float> <float 0x406F266660000000, float 7.900000e+01, float -4.695000e+02, float -4.880000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5920 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add5920, <4 x float>* undef, align 16
+  store <4 x float> %add5920, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add5934 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3121,7 +3121,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp495 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp496 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3131,13 +3131,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins5996 = insertelement <4 x float> undef, float %add5995, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins5996, <4 x float>* undef, align 16
+  store <4 x float> %vecins5996, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp497 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext5997 = extractelement <4 x float> %tmp497, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp498 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3149,15 +3149,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6002 = insertelement <4 x float> %tmp499, float %add6001, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6002, <4 x float>* undef, align 16
+  store <4 x float> %vecins6002, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07EA199A0000000, float 0x407DC33340000000, float 0xC0753199A0000000, float -3.895000e+02>, <4 x float>* undef
+  store <4 x float> <float 0xC07EA199A0000000, float 0x407DC33340000000, float 0xC0753199A0000000, float -3.895000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp500 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6004 = fadd <4 x float> undef, %tmp500
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6004, <4 x float>* undef, align 16
+  store <4 x float> %add6004, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp501 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3165,7 +3165,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp502 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6007 = insertelement <4 x float> %tmp502, float %val, i32 0
+  %vecins6007 = insertelement <4 x float> %tmp502, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp503 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3173,9 +3173,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp504 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6024 = insertelement <4 x float> %tmp504, float %val, i32 1
+  %vecins6024 = insertelement <4 x float> %tmp504, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6024, <4 x float>* undef, align 16
+  store <4 x float> %vecins6024, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp505 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3187,7 +3187,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6027 = insertelement <4 x float> %tmp506, float %add6026, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6027, <4 x float>* undef, align 16
+  store <4 x float> %vecins6027, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6028 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3197,15 +3197,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6030 = insertelement <4 x float> %tmp507, float %add6029, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6030, <4 x float>* undef, align 16
+  store <4 x float> %vecins6030, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC0527999A0000000, float 0xC06AD999A0000000, float 0x3FF6666660000000, float 0xC03F666660000000>, <4 x float>* undef
+  store <4 x float> <float 0xC0527999A0000000, float 0xC06AD999A0000000, float 0x3FF6666660000000, float 0xC03F666660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp508 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp509 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp510 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3213,7 +3213,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp511 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6036 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3221,17 +3221,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6038 = insertelement <4 x float> undef, float %add6037, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6038, <4 x float>* undef, align 16
+  store <4 x float> %vecins6038, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp512 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6040 = fadd float %val, 0x4071ECCCC0000000
+  %add6040 = fadd float undef, 0x4071ECCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp513 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6041 = insertelement <4 x float> %tmp513, float %add6040, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6041, <4 x float>* undef, align 16
+  store <4 x float> %vecins6041, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp514 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3243,9 +3243,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6044 = insertelement <4 x float> %tmp515, float %add6043, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6044, <4 x float>* undef, align 16
+  store <4 x float> %vecins6044, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC065FCCCC0000000, float 0x40767CCCC0000000, float 0x4079D4CCC0000000, float 0xC07314CCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC065FCCCC0000000, float 0x40767CCCC0000000, float 0x4079D4CCC0000000, float 0xC07314CCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp516 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3253,15 +3253,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6046 = fadd <4 x float> %tmp517, %tmp516
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6046, <4 x float>* undef, align 16
+  store <4 x float> %add6046, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6047 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp518 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6049 = insertelement <4 x float> %tmp518, float %val, i32 0
+  %vecins6049 = insertelement <4 x float> %tmp518, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6049, <4 x float>* undef, align 16
+  store <4 x float> %vecins6049, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp519 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3269,19 +3269,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6051 = fadd float %vecext6050, 0x407E4E6660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6055 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins6055 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6056 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp520 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6061 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp521 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp522 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3295,9 +3295,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6072 = insertelement <4 x float> undef, float %add6071, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6072, <4 x float>* undef, align 16
+  store <4 x float> %vecins6072, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40546CCCC0000000, float 0x4067D66660000000, float 0xC060E33340000000, float 0x4061533340000000>, <4 x float>* undef
+  store <4 x float> <float 0x40546CCCC0000000, float 0x4067D66660000000, float 0xC060E33340000000, float 0x4061533340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp523 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3305,7 +3305,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6074 = fadd <4 x float> %tmp524, %tmp523
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6074, <4 x float>* undef, align 16
+  store <4 x float> %add6074, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp525 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3317,23 +3317,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6077 = insertelement <4 x float> %tmp526, float %add6076, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6077, <4 x float>* undef, align 16
+  store <4 x float> %vecins6077, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp527 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6079 = fadd float %val, 0xC07E9B3340000000
+  %add6079 = fadd float undef, 0xC07E9B3340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp528 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp529 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6082 = fadd float %val, 0x407DCE6660000000
+  %add6082 = fadd float undef, 0x407DCE6660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6083 = insertelement <4 x float> undef, float %add6082, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6083, <4 x float>* undef, align 16
+  store <4 x float> %vecins6083, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp530 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3343,9 +3343,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6086 = insertelement <4 x float> undef, float %add6085, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6086, <4 x float>* undef, align 16
+  store <4 x float> %vecins6086, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4055C66660000000, float 0x40735199A0000000, float 0xC0713199A0000000, float 0x40729B3340000000>, <4 x float>* undef
+  store <4 x float> <float 0x4055C66660000000, float 0x40735199A0000000, float 0xC0713199A0000000, float 0x40729B3340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp531 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3353,19 +3353,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6088 = fadd <4 x float> %tmp532, %tmp531
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6088, <4 x float>* undef, align 16
+  store <4 x float> %add6088, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp533 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6089 = extractelement <4 x float> %tmp533, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6107 = fadd float %val, 0xC06A166660000000
+  %add6107 = fadd float undef, 0xC06A166660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp534 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6108 = insertelement <4 x float> %tmp534, float %add6107, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6108, <4 x float>* undef, align 16
+  store <4 x float> %vecins6108, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp535 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3375,7 +3375,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp536 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp537 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3395,7 +3395,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6119 = insertelement <4 x float> %tmp540, float %add6118, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6119, <4 x float>* undef, align 16
+  store <4 x float> %vecins6119, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp541 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3407,7 +3407,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6122 = insertelement <4 x float> %tmp542, float %add6121, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6122, <4 x float>* undef, align 16
+  store <4 x float> %vecins6122, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6123 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3415,17 +3415,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp543 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6126 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp544 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6128 = insertelement <4 x float> %tmp544, float %val, i32 3
+  %vecins6128 = insertelement <4 x float> %tmp544, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6128, <4 x float>* undef, align 16
+  store <4 x float> %vecins6128, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float -2.980000e+02, float 0xC06F0CCCC0000000, float 0xC054A66660000000, float 0xC040CCCCC0000000>, <4 x float>* undef
+  store <4 x float> <float -2.980000e+02, float 0xC06F0CCCC0000000, float 0xC054A66660000000, float 0xC040CCCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp545 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3441,7 +3441,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6133 = insertelement <4 x float> undef, float %add6132, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6133, <4 x float>* undef, align 16
+  store <4 x float> %vecins6133, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6134 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3463,9 +3463,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp551 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6178 = insertelement <4 x float> %tmp551, float %val, i32 1
+  %vecins6178 = insertelement <4 x float> %tmp551, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6178, <4 x float>* undef, align 16
+  store <4 x float> %vecins6178, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp552 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3487,13 +3487,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6184 = insertelement <4 x float> %tmp555, float %add6183, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6184, <4 x float>* undef, align 16
+  store <4 x float> %vecins6184, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp556 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6189 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins6189 = insertelement <4 x float> undef, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6189, <4 x float>* undef, align 16
+  store <4 x float> %vecins6189, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp557 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3505,7 +3505,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6192 = insertelement <4 x float> %tmp558, float %add6191, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6192, <4 x float>* undef, align 16
+  store <4 x float> %vecins6192, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp559 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3519,7 +3519,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6198 = insertelement <4 x float> %tmp561, float %add6197, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x407904CCC0000000, float 0x406A833340000000, float 4.895000e+02, float 0x40648999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x407904CCC0000000, float 0x406A833340000000, float 4.895000e+02, float 0x40648999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp562 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3527,7 +3527,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6200 = fadd <4 x float> %tmp563, %tmp562
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6200, <4 x float>* undef, align 16
+  store <4 x float> %add6200, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp564 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3535,7 +3535,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp565 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6203 = insertelement <4 x float> %tmp565, float %val, i32 0
+  %vecins6203 = insertelement <4 x float> %tmp565, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp566 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3549,9 +3549,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp568 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6209 = insertelement <4 x float> %tmp568, float %val, i32 2
+  %vecins6209 = insertelement <4 x float> %tmp568, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6209, <4 x float>* undef, align 16
+  store <4 x float> %vecins6209, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp569 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3559,7 +3559,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp570 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6219 = fadd float %val, 0xC0596CCCC0000000
+  %add6219 = fadd float undef, 0xC0596CCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp571 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3573,7 +3573,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6228 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6228, <4 x float>* undef, align 16
+  store <4 x float> %add6228, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6229 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3583,7 +3583,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6231 = insertelement <4 x float> %tmp573, float %add6230, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6231, <4 x float>* undef, align 16
+  store <4 x float> %vecins6231, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp574 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3595,7 +3595,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6234 = insertelement <4 x float> %tmp575, float %add6233, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6234, <4 x float>* undef, align 16
+  store <4 x float> %vecins6234, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6235 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3603,13 +3603,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6237 = insertelement <4 x float> undef, float %add6236, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6237, <4 x float>* undef, align 16
+  store <4 x float> %vecins6237, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp576 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6245 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins6245 = insertelement <4 x float> undef, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6245, <4 x float>* undef, align 16
+  store <4 x float> %vecins6245, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp577 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3619,17 +3619,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp578 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6251 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins6251 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp579 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6253 = fadd float %val, 0xC0692999A0000000
+  %add6253 = fadd float undef, 0xC0692999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6254 = insertelement <4 x float> undef, float %add6253, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6254, <4 x float>* undef, align 16
+  store <4 x float> %vecins6254, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 4.600000e+02, float 0xC0777B3340000000, float 0x40351999A0000000, float 0xC06E433340000000>, <4 x float>* undef
+  store <4 x float> <float 4.600000e+02, float 0xC0777B3340000000, float 0x40351999A0000000, float 0xC06E433340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp580 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3637,7 +3637,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6256 = fadd <4 x float> %tmp581, %tmp580
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6256, <4 x float>* undef, align 16
+  store <4 x float> %add6256, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp582 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3649,7 +3649,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6259 = insertelement <4 x float> %tmp583, float %add6258, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6259, <4 x float>* undef, align 16
+  store <4 x float> %vecins6259, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp584 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3661,7 +3661,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6262 = insertelement <4 x float> %tmp585, float %add6261, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6262, <4 x float>* undef, align 16
+  store <4 x float> %vecins6262, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp586 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3669,9 +3669,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp587 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6265 = insertelement <4 x float> %tmp587, float %val, i32 2
+  %vecins6265 = insertelement <4 x float> %tmp587, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6265, <4 x float>* undef, align 16
+  store <4 x float> %vecins6265, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp588 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3683,9 +3683,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6268 = insertelement <4 x float> %tmp589, float %add6267, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6268, <4 x float>* undef, align 16
+  store <4 x float> %vecins6268, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float -3.130000e+02, float 0xC079733340000000, float -4.660000e+02, float 0xC064E66660000000>, <4 x float>* undef
+  store <4 x float> <float -3.130000e+02, float 0xC079733340000000, float -4.660000e+02, float 0xC064E66660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp590 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3693,7 +3693,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6270 = fadd <4 x float> %tmp591, %tmp590
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6270, <4 x float>* undef, align 16
+  store <4 x float> %add6270, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp592 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3705,7 +3705,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6273 = insertelement <4 x float> %tmp593, float %add6272, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6273, <4 x float>* undef, align 16
+  store <4 x float> %vecins6273, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp594 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3717,7 +3717,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6276 = insertelement <4 x float> %tmp595, float %add6275, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6276, <4 x float>* undef, align 16
+  store <4 x float> %vecins6276, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp596 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3729,7 +3729,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6279 = insertelement <4 x float> %tmp597, float %add6278, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6279, <4 x float>* undef, align 16
+  store <4 x float> %vecins6279, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp598 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3739,21 +3739,21 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6282 = insertelement <4 x float> undef, float %add6281, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6282, <4 x float>* undef, align 16
+  store <4 x float> %vecins6282, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4067ECCCC0000000, float 0xC040CCCCC0000000, float 0xC0762E6660000000, float -4.750000e+02>, <4 x float>* undef
+  store <4 x float> <float 0x4067ECCCC0000000, float 0xC040CCCCC0000000, float 0xC0762E6660000000, float -4.750000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6284 = fadd <4 x float> undef, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6285 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6289 = fadd float %val, 0xC0738999A0000000
+  %add6289 = fadd float undef, 0xC0738999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp599 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6293 = insertelement <4 x float> %tmp599, float %val, i32 2
+  %vecins6293 = insertelement <4 x float> %tmp599, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6293, <4 x float>* undef, align 16
+  store <4 x float> %vecins6293, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp600 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3763,15 +3763,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6296 = insertelement <4 x float> undef, float %add6295, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6296, <4 x float>* undef, align 16
+  store <4 x float> %vecins6296, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40704199A0000000, float 0x40753CCCC0000000, float 0xC07E2199A0000000, float 0xC068833340000000>, <4 x float>* undef
+  store <4 x float> <float 0x40704199A0000000, float 0x40753CCCC0000000, float 0xC07E2199A0000000, float 0xC068833340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp601 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6298 = fadd <4 x float> undef, %tmp601
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6298, <4 x float>* undef, align 16
+  store <4 x float> %add6298, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp602 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3783,7 +3783,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6301 = insertelement <4 x float> %tmp603, float %add6300, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6301, <4 x float>* undef, align 16
+  store <4 x float> %vecins6301, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp604 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3795,7 +3795,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6304 = insertelement <4 x float> %tmp605, float %add6303, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6304, <4 x float>* undef, align 16
+  store <4 x float> %vecins6304, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp606 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3805,7 +3805,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6307 = insertelement <4 x float> undef, float %add6306, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6307, <4 x float>* undef, align 16
+  store <4 x float> %vecins6307, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp607 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3817,9 +3817,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6310 = insertelement <4 x float> %tmp608, float %add6309, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6310, <4 x float>* undef, align 16
+  store <4 x float> %vecins6310, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x407A233340000000, float 0x406DA33340000000, float 3.725000e+02, float 0x40761199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x407A233340000000, float 0x406DA33340000000, float 3.725000e+02, float 0x40761199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp609 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3827,7 +3827,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6312 = fadd <4 x float> %tmp610, %tmp609
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6312, <4 x float>* undef, align 16
+  store <4 x float> %add6312, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp611 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3849,13 +3849,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6657 = insertelement <4 x float> %tmp614, float %add6656, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6657, <4 x float>* undef, align 16
+  store <4 x float> %vecins6657, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6660 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins6660 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6660, <4 x float>* undef, align 16
+  store <4 x float> %vecins6660, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC064E33340000000, float 0xC064833340000000, float 0xC0673CCCC0000000, float 0xC074266660000000>, <4 x float>* undef
+  store <4 x float> <float 0xC064E33340000000, float 0xC064833340000000, float 0xC0673CCCC0000000, float 0xC074266660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp615 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3867,7 +3867,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6665 = insertelement <4 x float> %tmp616, float %add6664, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp617 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3875,15 +3875,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp618 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07CC4CCC0000000, float 0x404EE66660000000, float 0xC0754CCCC0000000, float 0xC0744B3340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC07CC4CCC0000000, float 0x404EE66660000000, float 0xC0754CCCC0000000, float 0xC0744B3340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp619 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6676 = fadd <4 x float> %tmp619, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6676, <4 x float>* undef, align 16
+  store <4 x float> %add6676, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp620 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3901,7 +3901,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp622 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp623 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3913,7 +3913,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6685 = insertelement <4 x float> %tmp624, float %add6684, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6685, <4 x float>* undef, align 16
+  store <4 x float> %vecins6685, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp625 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3925,15 +3925,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6688 = insertelement <4 x float> %tmp626, float %add6687, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6688, <4 x float>* undef, align 16
+  store <4 x float> %vecins6688, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 7.500000e+00, float 0x4077E33340000000, float 0xC0596CCCC0000000, float 0xC07D4E6660000000>, <4 x float>* undef
+  store <4 x float> <float 7.500000e+00, float 0x4077E33340000000, float 0xC0596CCCC0000000, float 0xC07D4E6660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp627 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6690 = fadd <4 x float> undef, %tmp627
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6690, <4 x float>* undef, align 16
+  store <4 x float> %add6690, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp628 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3945,7 +3945,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6693 = insertelement <4 x float> %tmp629, float %add6692, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6693, <4 x float>* undef, align 16
+  store <4 x float> %vecins6693, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp630 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3957,7 +3957,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6696 = insertelement <4 x float> %tmp631, float %add6695, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6696, <4 x float>* undef, align 16
+  store <4 x float> %vecins6696, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp632 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3969,7 +3969,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6699 = insertelement <4 x float> %tmp633, float %add6698, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6699, <4 x float>* undef, align 16
+  store <4 x float> %vecins6699, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp634 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3981,17 +3981,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6702 = insertelement <4 x float> %tmp635, float %add6701, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6702, <4 x float>* undef, align 16
+  store <4 x float> %vecins6702, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40772CCCC0000000, float 0xC0625CCCC0000000, float 6.200000e+01, float 0xC06ADCCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x40772CCCC0000000, float 0xC0625CCCC0000000, float 6.200000e+01, float 0xC06ADCCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp636 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp637 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6707 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins6707 = insertelement <4 x float> undef, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6707, <4 x float>* undef, align 16
+  store <4 x float> %vecins6707, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp638 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -3999,7 +3999,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp639 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp640 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4031,21 +4031,21 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp645 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6726 = fadd float %val, 0x4059B999A0000000
+  %add6726 = fadd float undef, 0x4059B999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp646 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6727 = insertelement <4 x float> %tmp646, float %add6726, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6727, <4 x float>* undef, align 16
+  store <4 x float> %vecins6727, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6728 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6729 = fadd float %vecext6728, 0xC073466660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC0309999A0000000, float -2.715000e+02, float 1.620000e+02, float 0x40674CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC0309999A0000000, float -2.715000e+02, float 1.620000e+02, float 0x40674CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp647 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4053,7 +4053,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6732 = fadd <4 x float> %tmp648, %tmp647
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6732, <4 x float>* undef, align 16
+  store <4 x float> %add6732, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp649 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4065,7 +4065,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6735 = insertelement <4 x float> %tmp650, float %add6734, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6735, <4 x float>* undef, align 16
+  store <4 x float> %vecins6735, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp651 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4077,7 +4077,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6738 = insertelement <4 x float> %tmp652, float %add6737, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6738, <4 x float>* undef, align 16
+  store <4 x float> %vecins6738, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp653 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4089,7 +4089,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6741 = insertelement <4 x float> %tmp654, float %add6740, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6741, <4 x float>* undef, align 16
+  store <4 x float> %vecins6741, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp655 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4101,7 +4101,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6744 = insertelement <4 x float> %tmp656, float %add6743, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6744, <4 x float>* undef, align 16
+  store <4 x float> %vecins6744, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp657 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4109,21 +4109,21 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6746 = fadd <4 x float> %tmp658, %tmp657
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6746, <4 x float>* undef, align 16
+  store <4 x float> %add6746, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp659 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6749 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins6749 = insertelement <4 x float> undef, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6749, <4 x float>* undef, align 16
+  store <4 x float> %vecins6749, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp660 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6751 = fadd float %val, 0x4075DE6660000000
+  %add6751 = fadd float undef, 0x4075DE6660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6752 = insertelement <4 x float> undef, float %add6751, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6752, <4 x float>* undef, align 16
+  store <4 x float> %vecins6752, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp661 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4133,7 +4133,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6755 = insertelement <4 x float> undef, float %add6754, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6755, <4 x float>* undef, align 16
+  store <4 x float> %vecins6755, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp662 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4145,15 +4145,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6758 = insertelement <4 x float> %tmp663, float %add6757, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6758, <4 x float>* undef, align 16
+  store <4 x float> %vecins6758, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x403D1999A0000000, float 0xC05F533340000000, float 3.945000e+02, float 3.950000e+01>, <4 x float>* undef
+  store <4 x float> <float 0x403D1999A0000000, float 0xC05F533340000000, float 3.945000e+02, float 3.950000e+01>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp664 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6760 = fadd <4 x float> undef, %tmp664
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6760, <4 x float>* undef, align 16
+  store <4 x float> %add6760, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp665 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4165,9 +4165,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp666 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC079BE6660000000, float 4.930000e+02, float 0x406CC33340000000, float 0xC062E999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC079BE6660000000, float 4.930000e+02, float 0x406CC33340000000, float 0xC062E999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp667 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4183,7 +4183,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6777 = insertelement <4 x float> %tmp669, float %add6776, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6777, <4 x float>* undef, align 16
+  store <4 x float> %vecins6777, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp670 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4195,9 +4195,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6784 = extractelement <4 x float> %tmp671, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6875 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins6875 = insertelement <4 x float> undef, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6875, <4 x float>* undef, align 16
+  store <4 x float> %vecins6875, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp672 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4207,15 +4207,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6878 = insertelement <4 x float> undef, float %add6877, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6878, <4 x float>* undef, align 16
+  store <4 x float> %vecins6878, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6888 = fadd float %val, 0x4057CCCCC0000000
+  %add6888 = fadd float undef, 0x4057CCCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp673 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6889 = insertelement <4 x float> %tmp673, float %add6888, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6889, <4 x float>* undef, align 16
+  store <4 x float> %vecins6889, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp674 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4227,7 +4227,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6892 = insertelement <4 x float> %tmp675, float %add6891, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6892, <4 x float>* undef, align 16
+  store <4 x float> %vecins6892, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp676 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4239,7 +4239,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6895 = insertelement <4 x float> %tmp677, float %add6894, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6895, <4 x float>* undef, align 16
+  store <4 x float> %vecins6895, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp678 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4249,7 +4249,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6900 = fadd <4 x float> %tmp680, %tmp679
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6900, <4 x float>* undef, align 16
+  store <4 x float> %add6900, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp681 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4261,9 +4261,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6903 = insertelement <4 x float> %tmp682, float %add6902, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6903, <4 x float>* undef, align 16
+  store <4 x float> %vecins6903, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6905 = fadd float %val, 0x4031B33340000000
+  %add6905 = fadd float undef, 0x4031B33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp683 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4271,9 +4271,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp684 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6912 = insertelement <4 x float> %tmp684, float %val, i32 3
+  %vecins6912 = insertelement <4 x float> %tmp684, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 3.315000e+02, float 0xC066C999A0000000, float 0xC061F33340000000, float 0x4071166660000000>, <4 x float>* undef
+  store <4 x float> <float 3.315000e+02, float 0xC066C999A0000000, float 0xC061F33340000000, float 0x4071166660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp685 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4281,13 +4281,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6914 = fadd <4 x float> %tmp686, %tmp685
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6914, <4 x float>* undef, align 16
+  store <4 x float> %add6914, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6915 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6920 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins6920 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6920, <4 x float>* undef, align 16
+  store <4 x float> %vecins6920, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext6921 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4295,11 +4295,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp687 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6926 = insertelement <4 x float> %tmp687, float %val, i32 3
+  %vecins6926 = insertelement <4 x float> %tmp687, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6926, <4 x float>* undef, align 16
+  store <4 x float> %vecins6926, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC03C4CCCC0000000, float 0xC07E5199A0000000, float -8.250000e+01, float 0xC043B33340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC03C4CCCC0000000, float 0xC07E5199A0000000, float -8.250000e+01, float 0xC043B33340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp688 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4307,13 +4307,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6928 = fadd <4 x float> %tmp689, %tmp688
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6928, <4 x float>* undef, align 16
+  store <4 x float> %add6928, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6930 = fadd float %val, -4.590000e+02
+  %add6930 = fadd float undef, -4.590000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6931 = insertelement <4 x float> undef, float %add6930, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6931, <4 x float>* undef, align 16
+  store <4 x float> %vecins6931, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp690 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4323,7 +4323,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp691 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp692 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4349,15 +4349,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp695 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6950 = fadd float %val, 0xC078F33340000000
+  %add6950 = fadd float undef, 0xC078F33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp696 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6951 = insertelement <4 x float> %tmp696, float %add6950, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6951, <4 x float>* undef, align 16
+  store <4 x float> %vecins6951, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp697 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4369,7 +4369,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6954 = insertelement <4 x float> %tmp698, float %add6953, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6954, <4 x float>* undef, align 16
+  store <4 x float> %vecins6954, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp699 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4377,7 +4377,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6956 = fadd <4 x float> %tmp700, %tmp699
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6956, <4 x float>* undef, align 16
+  store <4 x float> %add6956, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp701 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4389,7 +4389,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6959 = insertelement <4 x float> %tmp702, float %add6958, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6959, <4 x float>* undef, align 16
+  store <4 x float> %vecins6959, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp703 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4401,15 +4401,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6965 = insertelement <4 x float> %tmp704, float %add6964, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6965, <4 x float>* undef, align 16
+  store <4 x float> %vecins6965, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add6975 = fadd float %val, 0x406AF33340000000
+  %add6975 = fadd float undef, 0x406AF33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp705 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6976 = insertelement <4 x float> %tmp705, float %add6975, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6976, <4 x float>* undef, align 16
+  store <4 x float> %vecins6976, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp706 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4417,7 +4417,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6984 = fadd <4 x float> %tmp707, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6984, <4 x float>* undef, align 16
+  store <4 x float> %add6984, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp708 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4429,7 +4429,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins6987 = insertelement <4 x float> %tmp709, float %add6986, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6987, <4 x float>* undef, align 16
+  store <4 x float> %vecins6987, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp710 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4439,11 +4439,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp711 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins6996 = insertelement <4 x float> %tmp711, float %val, i32 3
+  %vecins6996 = insertelement <4 x float> %tmp711, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins6996, <4 x float>* undef, align 16
+  store <4 x float> %vecins6996, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4077A4CCC0000000, float 0xC0757199A0000000, float 0xC072F4CCC0000000, float 0xC071DCCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x4077A4CCC0000000, float 0xC0757199A0000000, float 0xC072F4CCC0000000, float 0xC071DCCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp712 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4451,7 +4451,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add6998 = fadd <4 x float> %tmp713, %tmp712
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add6998, <4 x float>* undef, align 16
+  store <4 x float> %add6998, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp714 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4463,7 +4463,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7001 = insertelement <4 x float> %tmp715, float %add7000, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7001, <4 x float>* undef, align 16
+  store <4 x float> %vecins7001, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp716 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4475,11 +4475,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7004 = insertelement <4 x float> %tmp717, float %add7003, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp718 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add7140 = fadd float %val, 0x403D333340000000
+  %add7140 = fadd float undef, 0x403D333340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7141 = insertelement <4 x float> undef, float %add7140, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4489,7 +4489,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7144 = insertelement <4 x float> undef, float %add7143, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp719 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4501,15 +4501,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7150 = insertelement <4 x float> %tmp720, float %add7149, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7150, <4 x float>* undef, align 16
+  store <4 x float> %vecins7150, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 1.700000e+02, float 0xC077B4CCC0000000, float 0x40625999A0000000, float 0x406C166660000000>, <4 x float>* undef
+  store <4 x float> <float 1.700000e+02, float 0xC077B4CCC0000000, float 0x40625999A0000000, float 0x406C166660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp721 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add7152 = fadd <4 x float> %tmp721, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add7152, <4 x float>* undef, align 16
+  store <4 x float> %add7152, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext7156 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4519,7 +4519,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7158 = insertelement <4 x float> %tmp722, float %add7157, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7158, <4 x float>* undef, align 16
+  store <4 x float> %vecins7158, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp723 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4531,13 +4531,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7161 = insertelement <4 x float> %tmp724, float %add7160, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7161, <4 x float>* undef, align 16
+  store <4 x float> %vecins7161, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add7168 = fadd float %val, 0xC072F199A0000000
+  %add7168 = fadd float undef, 0xC072F199A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp725 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext7170 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4545,11 +4545,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7172 = insertelement <4 x float> undef, float %add7171, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7172, <4 x float>* undef, align 16
+  store <4 x float> %vecins7172, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext7173 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp726 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4559,7 +4559,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7421 = insertelement <4 x float> undef, float %add7420, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7421, <4 x float>* undef, align 16
+  store <4 x float> %vecins7421, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp727 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4571,7 +4571,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7424 = insertelement <4 x float> %tmp728, float %add7423, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7424, <4 x float>* undef, align 16
+  store <4 x float> %vecins7424, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp729 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4583,11 +4583,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7427 = insertelement <4 x float> %tmp730, float %add7426, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7427, <4 x float>* undef, align 16
+  store <4 x float> %vecins7427, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext7428 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp731 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4599,9 +4599,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7570 = insertelement <4 x float> %tmp732, float %add7569, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7570, <4 x float>* undef, align 16
+  store <4 x float> %vecins7570, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40745199A0000000, float 0xC0411999A0000000, float -5.650000e+01, float -4.005000e+02>, <4 x float>* undef
+  store <4 x float> <float 0x40745199A0000000, float 0xC0411999A0000000, float -5.650000e+01, float -4.005000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp733 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4609,7 +4609,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add7572 = fadd <4 x float> %tmp734, %tmp733
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add7572, <4 x float>* undef, align 16
+  store <4 x float> %add7572, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext7573 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4619,11 +4619,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7575 = insertelement <4 x float> %tmp735, float %add7574, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7575, <4 x float>* undef, align 16
+  store <4 x float> %vecins7575, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp736 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add7577 = fadd float %val, 0xC051666660000000
+  %add7577 = fadd float undef, 0xC051666660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp737 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4635,7 +4635,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7581 = insertelement <4 x float> undef, float %add7580, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7581, <4 x float>* undef, align 16
+  store <4 x float> %vecins7581, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp739 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4647,7 +4647,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7584 = insertelement <4 x float> %tmp740, float %add7583, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC057533340000000, float 0x4060A33340000000, float 0x40791E6660000000, float 2.455000e+02>, <4 x float>* undef
+  store <4 x float> <float 0xC057533340000000, float 0x4060A33340000000, float 0x40791E6660000000, float 2.455000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp741 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4655,7 +4655,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add7586 = fadd <4 x float> %tmp742, %tmp741
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add7586, <4 x float>* undef, align 16
+  store <4 x float> %add7586, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp743 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4665,7 +4665,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp744 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp745 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4677,15 +4677,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7592 = insertelement <4 x float> %tmp746, float %add7591, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7592, <4 x float>* undef, align 16
+  store <4 x float> %vecins7592, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp747 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext7593 = extractelement <4 x float> %tmp747, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins7595 = insertelement <4 x float> undef, float %val, i32 2
+  %vecins7595 = insertelement <4 x float> undef, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7595, <4 x float>* undef, align 16
+  store <4 x float> %vecins7595, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp748 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4693,17 +4693,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add7597 = fadd float %vecext7596, 0x407E666660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x406A766660000000, float 0xBFC99999A0000000, float 0xC0751B3340000000, float -4.075000e+02>, <4 x float>* undef
+  store <4 x float> <float 0x406A766660000000, float 0xBFC99999A0000000, float 0xC0751B3340000000, float -4.075000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp749 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add7616 = fadd float %val, 0xC04DE66660000000
+  %add7616 = fadd float undef, 0xC04DE66660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp750 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7617 = insertelement <4 x float> %tmp750, float %add7616, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7617, <4 x float>* undef, align 16
+  store <4 x float> %vecins7617, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp751 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4715,17 +4715,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7620 = insertelement <4 x float> %tmp752, float %add7619, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7620, <4 x float>* undef, align 16
+  store <4 x float> %vecins7620, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp753 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add7622 = fadd float %val, 0xC054B999A0000000
+  %add7622 = fadd float undef, 0xC054B999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp754 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins7626 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins7626 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7626, <4 x float>* undef, align 16
+  store <4 x float> %vecins7626, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp755 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4733,7 +4733,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add7628 = fadd <4 x float> %tmp756, %tmp755
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add7628, <4 x float>* undef, align 16
+  store <4 x float> %add7628, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp757 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4745,13 +4745,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7631 = insertelement <4 x float> %tmp758, float %add7630, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add7639 = fadd float %val, 0x407C5999A0000000
+  %add7639 = fadd float undef, 0x407C5999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp759 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7640 = insertelement <4 x float> %tmp759, float %add7639, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x406AA66660000000, float 0x4067C66660000000, float 0xC054866660000000, float -2.400000e+01>, <4 x float>* undef
+  store <4 x float> <float 0x406AA66660000000, float 0x4067C66660000000, float 0xC054866660000000, float -2.400000e+01>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp760 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4759,9 +4759,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp761 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add7644 = fadd float %val, 0xC0758999A0000000
+  %add7644 = fadd float undef, 0xC0758999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp762 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4773,7 +4773,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7648 = insertelement <4 x float> %tmp763, float %add7647, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7648, <4 x float>* undef, align 16
+  store <4 x float> %vecins7648, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp764 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4785,7 +4785,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7651 = insertelement <4 x float> %tmp765, float %add7650, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7651, <4 x float>* undef, align 16
+  store <4 x float> %vecins7651, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp766 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4797,7 +4797,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7654 = insertelement <4 x float> %tmp767, float %add7653, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7654, <4 x float>* undef, align 16
+  store <4 x float> %vecins7654, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp768 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4805,7 +4805,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add7656 = fadd <4 x float> %tmp769, %tmp768
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add7656, <4 x float>* undef, align 16
+  store <4 x float> %add7656, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp770 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4817,7 +4817,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7659 = insertelement <4 x float> %tmp771, float %add7658, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7659, <4 x float>* undef, align 16
+  store <4 x float> %vecins7659, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp772 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4829,7 +4829,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7662 = insertelement <4 x float> %tmp773, float %add7661, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7662, <4 x float>* undef, align 16
+  store <4 x float> %vecins7662, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp774 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4841,7 +4841,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7665 = insertelement <4 x float> %tmp775, float %add7664, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7665, <4 x float>* undef, align 16
+  store <4 x float> %vecins7665, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp776 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4851,7 +4851,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7668 = insertelement <4 x float> undef, float %add7667, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7668, <4 x float>* undef, align 16
+  store <4 x float> %vecins7668, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp777 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4873,23 +4873,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp781 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp782 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add7731 = fadd float %val, 1.900000e+02
+  %add7731 = fadd float undef, 1.900000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp783 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins7732 = insertelement <4 x float> %tmp783, float %add7731, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7732, <4 x float>* undef, align 16
+  store <4 x float> %vecins7732, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp784 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins7735 = insertelement <4 x float> %tmp784, float %val, i32 2
+  %vecins7735 = insertelement <4 x float> %tmp784, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7735, <4 x float>* undef, align 16
+  store <4 x float> %vecins7735, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp785 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4897,11 +4897,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add7737 = fadd float %vecext7736, 0xC06AF66660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins7850 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins7850 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins7850, <4 x float>* undef, align 16
+  store <4 x float> %vecins7850, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4062A33340000000, float 2.290000e+02, float 0x40509999A0000000, float 0xC078BE6660000000>, <4 x float>* undef
+  store <4 x float> <float 0x4062A33340000000, float 2.290000e+02, float 0x40509999A0000000, float 0xC078BE6660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp786 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4909,7 +4909,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add7852 = fadd <4 x float> %tmp787, %tmp786
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add7852, <4 x float>* undef, align 16
+  store <4 x float> %add7852, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp788 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4921,13 +4921,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9398 = insertelement <4 x float> %tmp789, float %add9397, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9398, <4 x float>* undef, align 16
+  store <4 x float> %vecins9398, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9399 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp790 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9401 = insertelement <4 x float> %tmp790, float %val, i32 2
+  %vecins9401 = insertelement <4 x float> %tmp790, float undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp791 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4939,11 +4939,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9404 = insertelement <4 x float> %tmp792, float %add9403, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9404, <4 x float>* undef, align 16
+  store <4 x float> %vecins9404, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp793 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp794 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4959,7 +4959,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp796 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp797 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4971,7 +4971,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9415 = insertelement <4 x float> %tmp798, float %add9414, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9415, <4 x float>* undef, align 16
+  store <4 x float> %vecins9415, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp799 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4983,9 +4983,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9418 = insertelement <4 x float> %tmp800, float %add9417, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9418, <4 x float>* undef, align 16
+  store <4 x float> %vecins9418, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 3.555000e+02, float 0xC062E33340000000, float 0x4065C66660000000, float -3.645000e+02>, <4 x float>* undef
+  store <4 x float> <float 3.555000e+02, float 0xC062E33340000000, float 0x4065C66660000000, float -3.645000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp801 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -4993,7 +4993,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9420 = fadd <4 x float> %tmp802, %tmp801
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9420, <4 x float>* undef, align 16
+  store <4 x float> %add9420, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp803 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5001,9 +5001,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp804 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9423 = insertelement <4 x float> %tmp804, float %val, i32 0
+  %vecins9423 = insertelement <4 x float> %tmp804, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9423, <4 x float>* undef, align 16
+  store <4 x float> %vecins9423, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp805 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5015,17 +5015,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9426 = insertelement <4 x float> %tmp806, float %add9425, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9426, <4 x float>* undef, align 16
+  store <4 x float> %vecins9426, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp807 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9428 = fadd float %val, 0xC065466660000000
+  %add9428 = fadd float undef, 0xC065466660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp808 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9429 = insertelement <4 x float> %tmp808, float %add9428, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9429, <4 x float>* undef, align 16
+  store <4 x float> %vecins9429, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp809 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5037,7 +5037,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9432 = insertelement <4 x float> %tmp810, float %add9431, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC07C7E6660000000, float 1.205000e+02, float 0x4050D999A0000000, float 0xC06B233340000000>, <4 x float>* undef
+  store <4 x float> <float 0xC07C7E6660000000, float 1.205000e+02, float 0x4050D999A0000000, float 0xC06B233340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp811 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5045,7 +5045,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9434 = fadd <4 x float> %tmp812, %tmp811
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9436 = fadd float %val, -3.185000e+02
+  %add9436 = fadd float undef, -3.185000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp813 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5053,7 +5053,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp814 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp815 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5065,7 +5065,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9443 = insertelement <4 x float> %tmp816, float %add9442, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9443, <4 x float>* undef, align 16
+  store <4 x float> %vecins9443, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp817 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5077,7 +5077,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9446 = insertelement <4 x float> %tmp818, float %add9445, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9446, <4 x float>* undef, align 16
+  store <4 x float> %vecins9446, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp819 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5085,23 +5085,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9448 = fadd <4 x float> %tmp820, %tmp819
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9448, <4 x float>* undef, align 16
+  store <4 x float> %add9448, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9450 = fadd float %val, 0xC0718199A0000000
+  %add9450 = fadd float undef, 0xC0718199A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp821 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9451 = insertelement <4 x float> %tmp821, float %add9450, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9451, <4 x float>* undef, align 16
+  store <4 x float> %vecins9451, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp822 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp823 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9454 = insertelement <4 x float> %tmp823, float %val, i32 1
+  %vecins9454 = insertelement <4 x float> %tmp823, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9454, <4 x float>* undef, align 16
+  store <4 x float> %vecins9454, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp824 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5113,23 +5113,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9457 = insertelement <4 x float> %tmp825, float %add9456, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9457, <4 x float>* undef, align 16
+  store <4 x float> %vecins9457, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9458 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp826 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9460 = insertelement <4 x float> %tmp826, float %val, i32 3
+  %vecins9460 = insertelement <4 x float> %tmp826, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9460, <4 x float>* undef, align 16
+  store <4 x float> %vecins9460, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x407B5E6660000000, float 0x40648999A0000000, float 0xC06B966660000000, float 0x40341999A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x407B5E6660000000, float 0x40648999A0000000, float 0xC06B966660000000, float 0x40341999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp827 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9462 = fadd <4 x float> %tmp827, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9462, <4 x float>* undef, align 16
+  store <4 x float> %add9462, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp828 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5137,23 +5137,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp829 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9465 = insertelement <4 x float> %tmp829, float %val, i32 0
+  %vecins9465 = insertelement <4 x float> %tmp829, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9467 = fadd float %val, 0x405D666660000000
+  %add9467 = fadd float undef, 0x405D666660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp830 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9468 = insertelement <4 x float> %tmp830, float %add9467, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9468, <4 x float>* undef, align 16
+  store <4 x float> %vecins9468, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp831 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9470 = fadd float %val, 0x4077033340000000
+  %add9470 = fadd float undef, 0x4077033340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp832 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9472 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5163,9 +5163,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9474 = insertelement <4 x float> %tmp833, float %add9473, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9474, <4 x float>* undef, align 16
+  store <4 x float> %vecins9474, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x404F733340000000, float 0x407AB4CCC0000000, float 0x40605999A0000000, float 0xC03E4CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x404F733340000000, float 0x407AB4CCC0000000, float 0x40605999A0000000, float 0xC03E4CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp834 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5173,7 +5173,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9476 = fadd <4 x float> %tmp835, %tmp834
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9476, <4 x float>* undef, align 16
+  store <4 x float> %add9476, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp836 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5185,17 +5185,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9479 = insertelement <4 x float> %tmp837, float %add9478, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9479, <4 x float>* undef, align 16
+  store <4 x float> %vecins9479, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp838 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9481 = fadd float %val, 0x407BE33340000000
+  %add9481 = fadd float undef, 0x407BE33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp839 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9482 = insertelement <4 x float> %tmp839, float %add9481, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9482, <4 x float>* undef, align 16
+  store <4 x float> %vecins9482, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9483 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5205,7 +5205,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9485 = insertelement <4 x float> %tmp840, float %add9484, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9485, <4 x float>* undef, align 16
+  store <4 x float> %vecins9485, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp841 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5215,13 +5215,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp842 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC076B999A0000000, float 0xC0706CCCC0000000, float 0x407904CCC0000000, float 0x407EE199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0xC076B999A0000000, float 0xC0706CCCC0000000, float 0x407904CCC0000000, float 0x407EE199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp843 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp844 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5229,15 +5229,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9492 = fadd float %vecext9491, 0x407C166660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9495 = fadd float %val, 0x407DBB3340000000
+  %add9495 = fadd float undef, 0x407DBB3340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp845 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9496 = insertelement <4 x float> %tmp845, float %add9495, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9496, <4 x float>* undef, align 16
+  store <4 x float> %vecins9496, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp846 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5249,41 +5249,41 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9499 = insertelement <4 x float> %tmp847, float %add9498, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9499, <4 x float>* undef, align 16
+  store <4 x float> %vecins9499, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp848 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9501 = fadd float %val, 0x407D5CCCC0000000
+  %add9501 = fadd float undef, 0x407D5CCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp849 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9502 = insertelement <4 x float> %tmp849, float %add9501, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9502, <4 x float>* undef, align 16
+  store <4 x float> %vecins9502, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp850 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9504 = fadd <4 x float> %tmp850, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9504, <4 x float>* undef, align 16
+  store <4 x float> %add9504, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp851 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9506 = fadd float %val, 0x4076EE6660000000
+  %add9506 = fadd float undef, 0x4076EE6660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp852 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9507 = insertelement <4 x float> %tmp852, float %add9506, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9507, <4 x float>* undef, align 16
+  store <4 x float> %vecins9507, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp853 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9509 = fadd float %val, 0xC0535999A0000000
+  %add9509 = fadd float undef, 0xC0535999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp854 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp855 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5295,7 +5295,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9513 = insertelement <4 x float> %tmp856, float %add9512, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9513, <4 x float>* undef, align 16
+  store <4 x float> %vecins9513, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp857 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5303,11 +5303,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp858 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9516 = insertelement <4 x float> %tmp858, float %val, i32 3
+  %vecins9516 = insertelement <4 x float> %tmp858, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9516, <4 x float>* undef, align 16
+  store <4 x float> %vecins9516, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x407254CCC0000000, float 0x407844CCC0000000, float 0xC04D9999A0000000, float 0xC0550CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x407254CCC0000000, float 0x407844CCC0000000, float 0xC04D9999A0000000, float 0xC0550CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp859 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5319,9 +5319,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp862 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9521 = insertelement <4 x float> %tmp862, float %val, i32 0
+  %vecins9521 = insertelement <4 x float> %tmp862, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9521, <4 x float>* undef, align 16
+  store <4 x float> %vecins9521, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp863 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5333,25 +5333,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9524 = insertelement <4 x float> %tmp864, float %add9523, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9524, <4 x float>* undef, align 16
+  store <4 x float> %vecins9524, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp865 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9526 = fadd float %val, 0x4072833340000000
+  %add9526 = fadd float undef, 0x4072833340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp866 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9527 = insertelement <4 x float> %tmp866, float %add9526, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9527, <4 x float>* undef, align 16
+  store <4 x float> %vecins9527, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp867 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9530 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins9530 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9530, <4 x float>* undef, align 16
+  store <4 x float> %vecins9530, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4072F4CCC0000000, float 0x4065CCCCC0000000, float 0x4051D33340000000, float 0x40680CCCC0000000>, <4 x float>* undef
+  store <4 x float> <float 0x4072F4CCC0000000, float 0x4065CCCCC0000000, float 0x4051D33340000000, float 0x40680CCCC0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp868 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5363,9 +5363,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp870 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9535 = insertelement <4 x float> %tmp870, float %val, i32 0
+  %vecins9535 = insertelement <4 x float> %tmp870, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9535, <4 x float>* undef, align 16
+  store <4 x float> %vecins9535, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp871 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5377,7 +5377,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9538 = insertelement <4 x float> %tmp872, float %add9537, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9538, <4 x float>* undef, align 16
+  store <4 x float> %vecins9538, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp873 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5385,17 +5385,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9543 = fadd float %vecext9542, 0x4050D999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9576 = fadd float %val, 0x40219999A0000000
+  %add9576 = fadd float undef, 0x40219999A0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9577 = insertelement <4 x float> undef, float %add9576, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9577, <4 x float>* undef, align 16
+  store <4 x float> %vecins9577, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp874 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9580 = insertelement <4 x float> undef, float %val, i32 1
+  %vecins9580 = insertelement <4 x float> undef, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9580, <4 x float>* undef, align 16
+  store <4 x float> %vecins9580, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp875 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5407,11 +5407,11 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9583 = insertelement <4 x float> %tmp876, float %add9582, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9583, <4 x float>* undef, align 16
+  store <4 x float> %vecins9583, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp877 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9673 = extractelement <4 x float> undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5421,7 +5421,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9675 = insertelement <4 x float> %tmp878, float %add9674, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9675, <4 x float>* undef, align 16
+  store <4 x float> %vecins9675, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9676 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5441,7 +5441,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9681 = insertelement <4 x float> %tmp881, float %add9680, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9681, <4 x float>* undef, align 16
+  store <4 x float> %vecins9681, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp882 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5451,7 +5451,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9686 = fadd <4 x float> %tmp883, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9686, <4 x float>* undef, align 16
+  store <4 x float> %add9686, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp884 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5481,19 +5481,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9695 = insertelement <4 x float> %tmp888, float %add9694, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9695, <4 x float>* undef, align 16
+  store <4 x float> %vecins9695, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp889 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9697 = fadd float %val, 0x4058D33340000000
+  %add9697 = fadd float undef, 0x4058D33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp890 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9698 = insertelement <4 x float> %tmp890, float %add9697, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9698, <4 x float>* undef, align 16
+  store <4 x float> %vecins9698, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4062CCCCC0000000, float 0x407AD999A0000000, float 0x40582CCCC0000000, float 0xC0712B3340000000>, <4 x float>* undef
+  store <4 x float> <float 0x4062CCCCC0000000, float 0x407AD999A0000000, float 0x40582CCCC0000000, float 0xC0712B3340000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp891 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5509,7 +5509,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9703 = insertelement <4 x float> %tmp893, float %add9702, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9703, <4 x float>* undef, align 16
+  store <4 x float> %vecins9703, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp894 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5521,7 +5521,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9706 = insertelement <4 x float> %tmp895, float %add9705, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9706, <4 x float>* undef, align 16
+  store <4 x float> %vecins9706, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9707 = extractelement <4 x float> undef, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5531,23 +5531,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9709 = insertelement <4 x float> %tmp896, float %add9708, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9709, <4 x float>* undef, align 16
+  store <4 x float> %vecins9709, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp897 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9710 = extractelement <4 x float> %tmp897, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9712 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins9712 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9712, <4 x float>* undef, align 16
+  store <4 x float> %vecins9712, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4069F33340000000, float 0xC048266660000000, float 0x40638CCCC0000000, float 0xC07EC199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x4069F33340000000, float 0xC048266660000000, float 0x40638CCCC0000000, float 0xC07EC199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp898 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9714 = fadd <4 x float> undef, %tmp898
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9714, <4 x float>* undef, align 16
+  store <4 x float> %add9714, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp899 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5555,9 +5555,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp900 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9717 = insertelement <4 x float> %tmp900, float %val, i32 0
+  %vecins9717 = insertelement <4 x float> %tmp900, float undef, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9717, <4 x float>* undef, align 16
+  store <4 x float> %vecins9717, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp901 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5569,7 +5569,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9720 = insertelement <4 x float> %tmp902, float %add9719, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9720, <4 x float>* undef, align 16
+  store <4 x float> %vecins9720, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp903 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5581,7 +5581,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9723 = insertelement <4 x float> %tmp904, float %add9722, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9723, <4 x float>* undef, align 16
+  store <4 x float> %vecins9723, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp905 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5593,15 +5593,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9726 = insertelement <4 x float> %tmp906, float %add9725, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9726, <4 x float>* undef, align 16
+  store <4 x float> %vecins9726, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float -4.575000e+02, float 0x40713E6660000000, float 0x407D133340000000, float -1.425000e+02>, <4 x float>* undef
+  store <4 x float> <float -4.575000e+02, float 0x40713E6660000000, float 0x407D133340000000, float -1.425000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp907 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9728 = fadd <4 x float> %tmp907, undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9728, <4 x float>* undef, align 16
+  store <4 x float> %add9728, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp908 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5613,17 +5613,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9731 = insertelement <4 x float> %tmp909, float %add9730, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9731, <4 x float>* undef, align 16
+  store <4 x float> %vecins9731, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp910 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9733 = fadd float %val, 0xC050F33340000000
+  %add9733 = fadd float undef, 0xC050F33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp911 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9734 = insertelement <4 x float> %tmp911, float %add9733, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9734, <4 x float>* undef, align 16
+  store <4 x float> %vecins9734, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp912 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5635,23 +5635,23 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9737 = insertelement <4 x float> %tmp913, float %add9736, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9737, <4 x float>* undef, align 16
+  store <4 x float> %vecins9737, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp914 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9738 = extractelement <4 x float> %tmp914, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9740 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins9740 = insertelement <4 x float> undef, float undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9740, <4 x float>* undef, align 16
+  store <4 x float> %vecins9740, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 2.150000e+02, float 0x405A2CCCC0000000, float 2.310000e+02, float 0x404E1999A0000000>, <4 x float>* undef
+  store <4 x float> <float 2.150000e+02, float 0x405A2CCCC0000000, float 2.310000e+02, float 0x404E1999A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp915 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp916 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp917 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5661,7 +5661,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9745 = insertelement <4 x float> undef, float %add9744, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9745, <4 x float>* undef, align 16
+  store <4 x float> %vecins9745, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp918 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5673,7 +5673,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9748 = insertelement <4 x float> %tmp919, float %add9747, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9748, <4 x float>* undef, align 16
+  store <4 x float> %vecins9748, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp920 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5685,7 +5685,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9751 = insertelement <4 x float> %tmp921, float %add9750, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9751, <4 x float>* undef, align 16
+  store <4 x float> %vecins9751, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp922 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5697,9 +5697,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9754 = insertelement <4 x float> %tmp923, float %add9753, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9754, <4 x float>* undef, align 16
+  store <4 x float> %vecins9754, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 2.590000e+02, float 0x407B7199A0000000, float 0xC07ED199A0000000, float 0xC064FCCCC0000000>, <4 x float>* %.compoundliteral9755
+  store <4 x float> <float 2.590000e+02, float 0x407B7199A0000000, float 0xC07ED199A0000000, float 0xC064FCCCC0000000>, <4 x float>* %.compoundliteral9755
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp924 = load <4 x float>, <4 x float>* %.compoundliteral9755
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5717,7 +5717,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9759 = insertelement <4 x float> %tmp927, float %add9758, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9759, <4 x float>* undef, align 16
+  store <4 x float> %vecins9759, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp928 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5729,17 +5729,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9762 = insertelement <4 x float> %tmp929, float %add9761, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9762, <4 x float>* undef, align 16
+  store <4 x float> %vecins9762, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp930 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add9764 = fadd float %val, 0xC060E66660000000
+  %add9764 = fadd float undef, 0xC060E66660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp931 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9765 = insertelement <4 x float> %tmp931, float %add9764, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9765, <4 x float>* undef, align 16
+  store <4 x float> %vecins9765, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp932 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5751,9 +5751,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9768 = insertelement <4 x float> %tmp933, float %add9767, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9768, <4 x float>* undef, align 16
+  store <4 x float> %vecins9768, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4032CCCCC0000000, float -9.600000e+01, float -5.000000e+02, float 0x4078EE6660000000>, <4 x float>* %.compoundliteral9769
+  store <4 x float> <float 0x4032CCCCC0000000, float -9.600000e+01, float -5.000000e+02, float 0x4078EE6660000000>, <4 x float>* %.compoundliteral9769
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp934 = load <4 x float>, <4 x float>* %.compoundliteral9769
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5761,7 +5761,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add9770 = fadd <4 x float> %tmp935, %tmp934
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add9770, <4 x float>* undef, align 16
+  store <4 x float> %add9770, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp936 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5773,7 +5773,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9773 = insertelement <4 x float> %tmp937, float %add9772, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9773, <4 x float>* undef, align 16
+  store <4 x float> %vecins9773, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp938 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5785,25 +5785,25 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins9776 = insertelement <4 x float> %tmp939, float %add9775, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins9776, <4 x float>* undef, align 16
+  store <4 x float> %vecins9776, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext9816 = extractelement <4 x float> undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp940 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %vecins9818 = insertelement <4 x float> %tmp940, float %val, i32 1
+  %vecins9818 = insertelement <4 x float> %tmp940, float undef, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp941 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add10388 = fadd float %val, 4.755000e+02
+  %add10388 = fadd float undef, 4.755000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp942 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10389 = insertelement <4 x float> %tmp942, float %add10388, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10389, <4 x float>* undef, align 16
+  store <4 x float> %vecins10389, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp943 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5815,19 +5815,19 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10392 = insertelement <4 x float> %tmp944, float %add10391, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10392, <4 x float>* undef, align 16
+  store <4 x float> %vecins10392, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp945 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp946 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add10405 = fadd float %val, -5.650000e+01
+  %add10405 = fadd float undef, -5.650000e+01
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp947 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10406 = insertelement <4 x float> %tmp947, float %add10405, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10406, <4 x float>* undef, align 16
+  store <4 x float> %vecins10406, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp948 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5839,7 +5839,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10409 = insertelement <4 x float> %tmp949, float %add10408, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10409, <4 x float>* undef, align 16
+  store <4 x float> %vecins10409, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp950 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5849,9 +5849,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp951 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float -2.340000e+02, float -4.720000e+02, float 4.350000e+02, float 0xC059A66660000000>, <4 x float>* %.compoundliteral10413
+  store <4 x float> <float -2.340000e+02, float -4.720000e+02, float 4.350000e+02, float 0xC059A66660000000>, <4 x float>* %.compoundliteral10413
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp952 = load <4 x float>, <4 x float>* %.compoundliteral10413
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5859,7 +5859,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add10414 = fadd <4 x float> %tmp953, %tmp952
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add10414, <4 x float>* undef, align 16
+  store <4 x float> %add10414, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp954 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5871,7 +5871,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10417 = insertelement <4 x float> %tmp955, float %add10416, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10417, <4 x float>* undef, align 16
+  store <4 x float> %vecins10417, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp956 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5883,15 +5883,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10420 = insertelement <4 x float> %tmp957, float %add10419, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10420, <4 x float>* undef, align 16
+  store <4 x float> %vecins10420, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add10422 = fadd float %val, 0xC0662CCCC0000000
+  %add10422 = fadd float undef, 0xC0662CCCC0000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext10424 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x402B333340000000, float 0x40735E6660000000, float 0xC0567999A0000000, float 2.050000e+02>, <4 x float>* undef
+  store <4 x float> <float 0x402B333340000000, float 0x40735E6660000000, float 0xC0567999A0000000, float 2.050000e+02>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp958 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5899,7 +5899,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add10428 = fadd <4 x float> %tmp959, %tmp958
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add10428, <4 x float>* undef, align 16
+  store <4 x float> %add10428, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp960 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5909,13 +5909,13 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp961 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add10436 = fadd float %val, 0xC06AF33340000000
+  %add10436 = fadd float undef, 0xC06AF33340000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp962 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10437 = insertelement <4 x float> %tmp962, float %add10436, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10437, <4 x float>* undef, align 16
+  store <4 x float> %vecins10437, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecext10438 = extractelement <4 x float> undef, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5925,9 +5925,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10440 = insertelement <4 x float> %tmp963, float %add10439, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10440, <4 x float>* undef, align 16
+  store <4 x float> %vecins10440, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC065E999A0000000, float 0x4067D33340000000, float 0xC070133340000000, float 0x406B666660000000>, <4 x float>* undef
+  store <4 x float> <float 0xC065E999A0000000, float 0x4067D33340000000, float 0xC070133340000000, float 0x406B666660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp964 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5941,7 +5941,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10445 = insertelement <4 x float> %tmp966, float %add10444, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10445, <4 x float>* undef, align 16
+  store <4 x float> %vecins10445, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp967 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5953,7 +5953,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10448 = insertelement <4 x float> %tmp968, float %add10447, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10448, <4 x float>* undef, align 16
+  store <4 x float> %vecins10448, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp969 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5965,7 +5965,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10451 = insertelement <4 x float> %tmp970, float %add10450, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10451, <4 x float>* undef, align 16
+  store <4 x float> %vecins10451, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp971 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5975,7 +5975,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10454 = insertelement <4 x float> undef, float %add10453, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x406AFCCCC0000000, float 0xC07604CCC0000000, float 6.900000e+01, float 0xC060A66660000000>, <4 x float>* undef
+  store <4 x float> <float 0x406AFCCCC0000000, float 0xC07604CCC0000000, float 6.900000e+01, float 0xC060A66660000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp972 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5983,7 +5983,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %add10456 = fadd <4 x float> %tmp973, %tmp972
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %add10456, <4 x float>* undef, align 16
+  store <4 x float> %add10456, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp974 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -5993,7 +5993,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10459 = insertelement <4 x float> undef, float %add10458, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10459, <4 x float>* undef, align 16
+  store <4 x float> %vecins10459, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp975 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6015,7 +6015,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10465 = insertelement <4 x float> %tmp978, float %add10464, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10465, <4 x float>* undef, align 16
+  store <4 x float> %vecins10465, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp979 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6027,9 +6027,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10468 = insertelement <4 x float> %tmp980, float %add10467, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10468, <4 x float>* undef, align 16
+  store <4 x float> %vecins10468, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x4078833340000000, float 0x40786CCCC0000000, float 0xC0468CCCC0000000, float 0xC0793199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x4078833340000000, float 0x40786CCCC0000000, float 0xC0468CCCC0000000, float 0xC0793199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp981 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6045,7 +6045,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10473 = insertelement <4 x float> %tmp983, float %add10472, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10473, <4 x float>* undef, align 16
+  store <4 x float> %vecins10473, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp984 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6057,15 +6057,15 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10476 = insertelement <4 x float> %tmp985, float %add10475, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10476, <4 x float>* undef, align 16
+  store <4 x float> %vecins10476, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add10489 = fadd float %val, 0x4074666660000000
+  %add10489 = fadd float undef, 0x4074666660000000
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp986 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10490 = insertelement <4 x float> %tmp986, float %add10489, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10490, <4 x float>* undef, align 16
+  store <4 x float> %vecins10490, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp987 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6079,9 +6079,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10510 = insertelement <4 x float> %tmp989, float %add10509, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10510, <4 x float>* undef, align 16
+  store <4 x float> %vecins10510, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0x40656999A0000000, float 0xC073766660000000, float 1.685000e+02, float 0x40765199A0000000>, <4 x float>* undef
+  store <4 x float> <float 0x40656999A0000000, float 0xC073766660000000, float 1.685000e+02, float 0x40765199A0000000>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp990 = load <4 x float>, <4 x float>* undef
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6097,17 +6097,17 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10515 = insertelement <4 x float> %tmp992, float %add10514, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10515, <4 x float>* undef, align 16
+  store <4 x float> %vecins10515, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp993 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  %add10562 = fadd float %val, 2.035000e+02
+  %add10562 = fadd float undef, 2.035000e+02
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp994 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10563 = insertelement <4 x float> %tmp994, float %add10562, i32 2
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10563, <4 x float>* undef, align 16
+  store <4 x float> %vecins10563, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp995 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6119,9 +6119,9 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10566 = insertelement <4 x float> %tmp996, float %add10565, i32 3
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10566, <4 x float>* undef, align 16
+  store <4 x float> %vecins10566, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> <float 0xC068B999A0000000, float 0xC050E66660000000, float 0xC0725999A0000000, float 0xC054D33340000000>, <4 x float>* %.compoundliteral10567
+  store <4 x float> <float 0xC068B999A0000000, float 0xC050E66660000000, float 0xC0725999A0000000, float 0xC054D33340000000>, <4 x float>* %.compoundliteral10567
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp997 = load <4 x float>, <4 x float>* %.compoundliteral10567
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6139,7 +6139,7 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10571 = insertelement <4 x float> %tmp1000, float %add10570, i32 0
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10571, <4 x float>* undef, align 16
+  store <4 x float> %vecins10571, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %tmp1001 = load <4 x float>, <4 x float>* undef, align 16
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
@@ -6151,56 +6151,56 @@ entry:
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
   %vecins10574 = insertelement <4 x float> %tmp1002, float %add10573, i32 1
   tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
-  store volatile <4 x float> %vecins10574, <4 x float>* undef, align 16
+  store <4 x float> %vecins10574, <4 x float>* undef, align 16
   %tmp1003 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10575 = extractelement <4 x float> %tmp1003, i32 2
   %tmp1004 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins10577 = insertelement <4 x float> %tmp1004, float %val, i32 2
-  store volatile <4 x float> %vecins10577, <4 x float>* undef, align 16
+  %vecins10577 = insertelement <4 x float> %tmp1004, float undef, i32 2
+  store <4 x float> %vecins10577, <4 x float>* undef, align 16
   %tmp1005 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10578 = extractelement <4 x float> %tmp1005, i32 3
   %add10579 = fadd float %vecext10578, 0x4076566660000000
   %tmp1006 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10580 = insertelement <4 x float> %tmp1006, float %add10579, i32 3
-  store volatile <4 x float> %vecins10580, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x407CAB3340000000, float 1.685000e+02, float 0xC07B866660000000, float 0xC061ACCCC0000000>, <4 x float>* %.compoundliteral10581
+  store <4 x float> %vecins10580, <4 x float>* undef, align 16
+  store <4 x float> <float 0x407CAB3340000000, float 1.685000e+02, float 0xC07B866660000000, float 0xC061ACCCC0000000>, <4 x float>* %.compoundliteral10581
   %tmp1007 = load <4 x float>, <4 x float>* %.compoundliteral10581
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1008 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10583 = extractelement <4 x float> %tmp1008, i32 0
   %add10584 = fadd float %vecext10583, 0xC060533340000000
   %tmp1009 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10585 = insertelement <4 x float> %tmp1009, float %add10584, i32 0
-  store volatile <4 x float> %vecins10585, <4 x float>* undef, align 16
+  store <4 x float> %vecins10585, <4 x float>* undef, align 16
   %tmp1010 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10586 = extractelement <4 x float> %tmp1010, i32 1
   %add10587 = fadd float %vecext10586, 0xC0694CCCC0000000
   %tmp1011 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10588 = insertelement <4 x float> %tmp1011, float %add10587, i32 1
-  store volatile <4 x float> %vecins10588, <4 x float>* undef, align 16
+  store <4 x float> %vecins10588, <4 x float>* undef, align 16
   %tmp1012 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10589 = extractelement <4 x float> %tmp1012, i32 2
   %add10590 = fadd float %vecext10589, 0xC0541999A0000000
   %tmp1013 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10591 = insertelement <4 x float> %tmp1013, float %add10590, i32 2
-  store volatile <4 x float> %vecins10591, <4 x float>* undef, align 16
+  store <4 x float> %vecins10591, <4 x float>* undef, align 16
   %tmp1014 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10592 = extractelement <4 x float> %tmp1014, i32 3
   %add10593 = fadd float %vecext10592, 0xC06C566660000000
   %tmp1015 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10594 = insertelement <4 x float> %tmp1015, float %add10593, i32 3
-  store volatile <4 x float> %vecins10594, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x407A3199A0000000, float 0xC0659999A0000000, float 0x407E0999A0000000, float 0xC0334CCCC0000000>, <4 x float>* %.compoundliteral10595
+  store <4 x float> %vecins10594, <4 x float>* undef, align 16
+  store <4 x float> <float 0x407A3199A0000000, float 0xC0659999A0000000, float 0x407E0999A0000000, float 0xC0334CCCC0000000>, <4 x float>* %.compoundliteral10595
   %tmp1016 = load <4 x float>, <4 x float>* %.compoundliteral10595
   %tmp1017 = load <4 x float>, <4 x float>* undef, align 16
   %add10596 = fadd <4 x float> %tmp1017, %tmp1016
-  store volatile <4 x float> %add10596, <4 x float>* undef, align 16
+  store <4 x float> %add10596, <4 x float>* undef, align 16
   %tmp1018 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10597 = extractelement <4 x float> %tmp1018, i32 0
   %add10598 = fadd float %vecext10597, 0x40640999A0000000
   %tmp1019 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10599 = insertelement <4 x float> %tmp1019, float %add10598, i32 0
-  store volatile <4 x float> %vecins10599, <4 x float>* undef, align 16
+  store <4 x float> %vecins10599, <4 x float>* undef, align 16
   %tmp1020 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10600 = extractelement <4 x float> %tmp1020, i32 1
   %add10601 = fadd float %vecext10600, 0xC073966660000000
@@ -6211,48 +6211,48 @@ entry:
   %add10604 = fadd float %vecext10603, 1.780000e+02
   %tmp1023 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10605 = insertelement <4 x float> %tmp1023, float %add10604, i32 2
-  store volatile <4 x float> %vecins10605, <4 x float>* undef, align 16
+  store <4 x float> %vecins10605, <4 x float>* undef, align 16
   %tmp1024 = load <4 x float>, <4 x float>* undef, align 16
-  %add10607 = fadd float %val, 0x4070A33340000000
+  %add10607 = fadd float undef, 0x4070A33340000000
   %tmp1025 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x407C5999A0000000, float 0x4046733340000000, float 0xC06E6CCCC0000000, float 0xC063C33340000000>, <4 x float>* %.compoundliteral10609
+  store <4 x float> <float 0x407C5999A0000000, float 0x4046733340000000, float 0xC06E6CCCC0000000, float 0xC063C33340000000>, <4 x float>* %.compoundliteral10609
   %tmp1026 = load <4 x float>, <4 x float>* %.compoundliteral10609
   %tmp1027 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1028 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10611 = extractelement <4 x float> %tmp1028, i32 0
   %add10612 = fadd float %vecext10611, 0x40757199A0000000
   %vecins10613 = insertelement <4 x float> undef, float %add10612, i32 0
-  store volatile <4 x float> %vecins10613, <4 x float>* undef, align 16
+  store <4 x float> %vecins10613, <4 x float>* undef, align 16
   %tmp1029 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10614 = extractelement <4 x float> %tmp1029, i32 1
   %add10615 = fadd float %vecext10614, 0x40740CCCC0000000
   %tmp1030 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10616 = insertelement <4 x float> %tmp1030, float %add10615, i32 1
-  store volatile <4 x float> %vecins10616, <4 x float>* undef, align 16
+  store <4 x float> %vecins10616, <4 x float>* undef, align 16
   %tmp1031 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10617 = extractelement <4 x float> %tmp1031, i32 2
   %add10618 = fadd float %vecext10617, 0xC012CCCCC0000000
   %tmp1032 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10619 = insertelement <4 x float> %tmp1032, float %add10618, i32 2
-  store volatile <4 x float> %vecins10619, <4 x float>* undef, align 16
+  store <4 x float> %vecins10619, <4 x float>* undef, align 16
   %tmp1033 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10620 = extractelement <4 x float> %tmp1033, i32 3
   %add10621 = fadd float %vecext10620, 0x406E566660000000
   %tmp1034 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x407B2199A0000000, float 0xC07D9CCCC0000000, float -4.350000e+01, float 0xC07D3B3340000000>, <4 x float>* %.compoundliteral10623
+  store <4 x float> <float 0x407B2199A0000000, float 0xC07D9CCCC0000000, float -4.350000e+01, float 0xC07D3B3340000000>, <4 x float>* %.compoundliteral10623
   %tmp1035 = load <4 x float>, <4 x float>* %.compoundliteral10623
   %add10624 = fadd <4 x float> undef, %tmp1035
   %tmp1036 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10625 = extractelement <4 x float> %tmp1036, i32 0
   %tmp1037 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins10627 = insertelement <4 x float> %tmp1037, float %val, i32 0
-  store volatile <4 x float> %vecins10627, <4 x float>* undef, align 16
+  %vecins10627 = insertelement <4 x float> %tmp1037, float undef, i32 0
+  store <4 x float> %vecins10627, <4 x float>* undef, align 16
   %tmp1038 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10628 = extractelement <4 x float> %tmp1038, i32 1
   %add10629 = fadd float %vecext10628, 0x407E3CCCC0000000
   %tmp1039 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10630 = insertelement <4 x float> %tmp1039, float %add10629, i32 1
-  store volatile <4 x float> %vecins10630, <4 x float>* undef, align 16
+  store <4 x float> %vecins10630, <4 x float>* undef, align 16
   %tmp1040 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10631 = extractelement <4 x float> %tmp1040, i32 2
   %tmp1041 = load <4 x float>, <4 x float>* undef, align 16
@@ -6261,8 +6261,8 @@ entry:
   %add10635 = fadd float %vecext10634, 0xC067533340000000
   %tmp1043 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10636 = insertelement <4 x float> %tmp1043, float %add10635, i32 3
-  store volatile <4 x float> %vecins10636, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 1.950000e+02, float 0x407E8E6660000000, float 0x407D7CCCC0000000, float 0x407E166660000000>, <4 x float>* %.compoundliteral10637
+  store <4 x float> %vecins10636, <4 x float>* undef, align 16
+  store <4 x float> <float 1.950000e+02, float 0x407E8E6660000000, float 0x407D7CCCC0000000, float 0x407E166660000000>, <4 x float>* %.compoundliteral10637
   %tmp1044 = load <4 x float>, <4 x float>* undef, align 16
   %add10638 = fadd <4 x float> %tmp1044, undef
   %tmp1045 = load <4 x float>, <4 x float>* undef, align 16
@@ -6270,94 +6270,94 @@ entry:
   %add10640 = fadd float %vecext10639, 0x406CA33340000000
   %tmp1046 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10641 = insertelement <4 x float> %tmp1046, float %add10640, i32 0
-  store volatile <4 x float> %vecins10641, <4 x float>* undef, align 16
+  store <4 x float> %vecins10641, <4 x float>* undef, align 16
   %tmp1047 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10642 = extractelement <4 x float> %tmp1047, i32 1
   %add10643 = fadd float %vecext10642, 0xC07C8999A0000000
   %tmp1048 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10644 = insertelement <4 x float> %tmp1048, float %add10643, i32 1
-  store volatile <4 x float> %vecins10644, <4 x float>* undef, align 16
+  store <4 x float> %vecins10644, <4 x float>* undef, align 16
   %tmp1049 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10645 = extractelement <4 x float> %tmp1049, i32 2
   %tmp1050 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1051 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins10748 = insertelement <4 x float> undef, float %val, i32 3
+  %vecins10748 = insertelement <4 x float> undef, float undef, i32 3
   %tmp1052 = load <4 x float>, <4 x float>* %.compoundliteral10749
   %add10750 = fadd <4 x float> undef, %tmp1052
-  store volatile <4 x float> %add10750, <4 x float>* undef, align 16
+  store <4 x float> %add10750, <4 x float>* undef, align 16
   %tmp1053 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10751 = extractelement <4 x float> %tmp1053, i32 0
   %add10752 = fadd float %vecext10751, 0x4071B33340000000
   %tmp1054 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10753 = insertelement <4 x float> %tmp1054, float %add10752, i32 0
-  store volatile <4 x float> %vecins10753, <4 x float>* undef, align 16
+  store <4 x float> %vecins10753, <4 x float>* undef, align 16
   %tmp1055 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10754 = extractelement <4 x float> %tmp1055, i32 1
   %add10755 = fadd float %vecext10754, 0xC076A66660000000
   %tmp1056 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10756 = insertelement <4 x float> %tmp1056, float %add10755, i32 1
-  store volatile <4 x float> %vecins10756, <4 x float>* undef, align 16
+  store <4 x float> %vecins10756, <4 x float>* undef, align 16
   %tmp1057 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10757 = extractelement <4 x float> %tmp1057, i32 2
   %add10758 = fadd float %vecext10757, 3.800000e+01
   %tmp1058 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10759 = insertelement <4 x float> %tmp1058, float %add10758, i32 2
-  store volatile <4 x float> %vecins10759, <4 x float>* undef, align 16
+  store <4 x float> %vecins10759, <4 x float>* undef, align 16
   %tmp1059 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10760 = extractelement <4 x float> %tmp1059, i32 3
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0xC075BB3340000000, float 0x4074D4CCC0000000, float 0xC07A466660000000, float 0xC0691CCCC0000000>, <4 x float>* %.compoundliteral10763
+  store <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> <float 0xC075BB3340000000, float 0x4074D4CCC0000000, float 0xC07A466660000000, float 0xC0691CCCC0000000>, <4 x float>* %.compoundliteral10763
   %tmp1060 = load <4 x float>, <4 x float>* %.compoundliteral10763
   %tmp1061 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1062 = load <4 x float>, <4 x float>* undef, align 16
-  %add10985 = fadd float %val, 0x405E933340000000
+  %add10985 = fadd float undef, 0x405E933340000000
   %tmp1063 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10986 = insertelement <4 x float> %tmp1063, float %add10985, i32 3
-  store volatile <4 x float> %vecins10986, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0xC0721E6660000000, float -4.180000e+02, float 0x406F366660000000, float 0xC055F999A0000000>, <4 x float>* %.compoundliteral10987
+  store <4 x float> %vecins10986, <4 x float>* undef, align 16
+  store <4 x float> <float 0xC0721E6660000000, float -4.180000e+02, float 0x406F366660000000, float 0xC055F999A0000000>, <4 x float>* %.compoundliteral10987
   %tmp1064 = load <4 x float>, <4 x float>* %.compoundliteral10987
   %tmp1065 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins10994 = insertelement <4 x float> %tmp1065, float %val, i32 1
+  %vecins10994 = insertelement <4 x float> %tmp1065, float undef, i32 1
   %tmp1066 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10995 = extractelement <4 x float> %tmp1066, i32 2
   %add10996 = fadd float %vecext10995, 0x406F9999A0000000
   %tmp1067 = load <4 x float>, <4 x float>* undef, align 16
   %vecins10997 = insertelement <4 x float> %tmp1067, float %add10996, i32 2
-  store volatile <4 x float> %vecins10997, <4 x float>* undef, align 16
+  store <4 x float> %vecins10997, <4 x float>* undef, align 16
   %tmp1068 = load <4 x float>, <4 x float>* undef, align 16
   %vecext10998 = extractelement <4 x float> %tmp1068, i32 3
   %add10999 = fadd float %vecext10998, -2.765000e+02
   %tmp1069 = load <4 x float>, <4 x float>* undef, align 16
   %vecins11000 = insertelement <4 x float> %tmp1069, float %add10999, i32 3
-  store volatile <4 x float> %vecins11000, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x4078F999A0000000, float 0xC06D166660000000, float 0x40501999A0000000, float 0x406FC999A0000000>, <4 x float>* %.compoundliteral11001
+  store <4 x float> %vecins11000, <4 x float>* undef, align 16
+  store <4 x float> <float 0x4078F999A0000000, float 0xC06D166660000000, float 0x40501999A0000000, float 0x406FC999A0000000>, <4 x float>* %.compoundliteral11001
   %tmp1070 = load <4 x float>, <4 x float>* undef, align 16
   %add11002 = fadd <4 x float> %tmp1070, undef
   %vecext11003 = extractelement <4 x float> undef, i32 0
   %vecext11009 = extractelement <4 x float> undef, i32 2
   %tmp1071 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins11033 = insertelement <4 x float> %tmp1071, float %val, i32 0
-  store volatile <4 x float> %vecins11033, <4 x float>* undef, align 16
+  %vecins11033 = insertelement <4 x float> %tmp1071, float undef, i32 0
+  store <4 x float> %vecins11033, <4 x float>* undef, align 16
   %tmp1072 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11034 = extractelement <4 x float> %tmp1072, i32 1
   %add11035 = fadd float %vecext11034, 0x4056D33340000000
   %tmp1073 = load <4 x float>, <4 x float>* undef, align 16
   %vecins11036 = insertelement <4 x float> %tmp1073, float %add11035, i32 1
-  store volatile <4 x float> %vecins11036, <4 x float>* undef, align 16
+  store <4 x float> %vecins11036, <4 x float>* undef, align 16
   %tmp1074 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11037 = extractelement <4 x float> %tmp1074, i32 2
   %add11038 = fadd float %vecext11037, 0xC06EA33340000000
   %tmp1075 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1076 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11040 = extractelement <4 x float> %tmp1076, i32 3
   %add11041 = fadd float %vecext11040, 0x40746CCCC0000000
   %tmp1077 = load <4 x float>, <4 x float>* undef, align 16
   %vecins11042 = insertelement <4 x float> %tmp1077, float %add11041, i32 3
-  store volatile <4 x float> <float 0x405DD999A0000000, float -3.775000e+02, float -1.265000e+02, float 0xC065C66660000000>, <4 x float>* undef
+  store <4 x float> <float 0x405DD999A0000000, float -3.775000e+02, float -1.265000e+02, float 0xC065C66660000000>, <4 x float>* undef
   %tmp1078 = load <4 x float>, <4 x float>* undef, align 16
   %add11044 = fadd <4 x float> %tmp1078, undef
-  store volatile <4 x float> %add11044, <4 x float>* undef, align 16
+  store <4 x float> %add11044, <4 x float>* undef, align 16
   %tmp1079 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11045 = extractelement <4 x float> %tmp1079, i32 0
   %add11046 = fadd float %vecext11045, 0xC076E66660000000
@@ -6366,58 +6366,58 @@ entry:
   %tmp1081 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11048 = extractelement <4 x float> %tmp1081, i32 1
   %add11049 = fadd float %vecext11048, 4.100000e+02
-  %vecins11064 = insertelement <4 x float> undef, float %val, i32 1
-  %add11074 = fadd float %val, 0xC06FF999A0000000
+  %vecins11064 = insertelement <4 x float> undef, float undef, i32 1
+  %add11074 = fadd float undef, 0xC06FF999A0000000
   %tmp1082 = load <4 x float>, <4 x float>* undef, align 16
   %vecins11075 = insertelement <4 x float> %tmp1082, float %add11074, i32 0
-  store volatile <4 x float> %vecins11075, <4 x float>* undef, align 16
-  %add11077 = fadd float %val, 0xC075D33340000000
+  store <4 x float> %vecins11075, <4 x float>* undef, align 16
+  %add11077 = fadd float undef, 0xC075D33340000000
   %tmp1083 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1084 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1085 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11093 = extractelement <4 x float> %tmp1085, i32 2
   %add11094 = fadd float %vecext11093, 0xC07CD66660000000
   %tmp1086 = load <4 x float>, <4 x float>* undef, align 16
   %vecins11095 = insertelement <4 x float> %tmp1086, float %add11094, i32 2
-  store volatile <4 x float> %vecins11095, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x4061F66660000000, float 0xC076DB3340000000, float 0xC055A66660000000, float 2.415000e+02>, <4 x float>* undef
+  store <4 x float> %vecins11095, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> <float 0x4061F66660000000, float 0xC076DB3340000000, float 0xC055A66660000000, float 2.415000e+02>, <4 x float>* undef
   %tmp1087 = load <4 x float>, <4 x float>* undef
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1088 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11513 = extractelement <4 x float> %tmp1088, i32 2
   %add11514 = fadd float %vecext11513, 0xC07C7199A0000000
   %vecins11515 = insertelement <4 x float> undef, float %add11514, i32 2
-  store volatile <4 x float> %vecins11515, <4 x float>* undef, align 16
+  store <4 x float> %vecins11515, <4 x float>* undef, align 16
   %add11520 = fadd <4 x float> undef, undef
-  store volatile <4 x float> %add11520, <4 x float>* undef, align 16
+  store <4 x float> %add11520, <4 x float>* undef, align 16
   %vecext11521 = extractelement <4 x float> undef, i32 0
   %add11522 = fadd float %vecext11521, 0x4041733340000000
   %tmp1089 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1090 = load <4 x float>, <4 x float>* undef
   %tmp1091 = load <4 x float>, <4 x float>* undef, align 16
   %add11562 = fadd <4 x float> %tmp1091, %tmp1090
   %tmp1092 = load <4 x float>, <4 x float>* undef, align 16
-  %add11564 = fadd float %val, 0xC0411999A0000000
+  %add11564 = fadd float undef, 0xC0411999A0000000
   %tmp1093 = load <4 x float>, <4 x float>* undef, align 16
   %vecins11565 = insertelement <4 x float> %tmp1093, float %add11564, i32 0
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %vecext11586 = extractelement <4 x float> undef, i32 3
   %add11587 = fadd float %vecext11586, 3.760000e+02
   %tmp1094 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0xC06ED999A0000000, float 1.380000e+02, float 0xC073AB3340000000, float 0x4078A66660000000>, <4 x float>* undef
+  store <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> <float 0xC06ED999A0000000, float 1.380000e+02, float 0xC073AB3340000000, float 0x4078A66660000000>, <4 x float>* undef
   %tmp1095 = load <4 x float>, <4 x float>* undef
   %tmp1096 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1097 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1098 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins11593 = insertelement <4 x float> %tmp1098, float %val, i32 0
+  %vecins11593 = insertelement <4 x float> %tmp1098, float undef, i32 0
   %vecext11594 = extractelement <4 x float> undef, i32 1
   %tmp1099 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins11596 = insertelement <4 x float> %tmp1099, float %val, i32 1
-  store volatile <4 x float> %vecins11596, <4 x float>* undef, align 16
+  %vecins11596 = insertelement <4 x float> %tmp1099, float undef, i32 1
+  store <4 x float> %vecins11596, <4 x float>* undef, align 16
   %tmp1100 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11597 = extractelement <4 x float> %tmp1100, i32 2
   %add11598 = fadd float %vecext11597, 0x40430CCCC0000000
@@ -6426,34 +6426,34 @@ entry:
   %tmp1102 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11600 = extractelement <4 x float> %tmp1102, i32 3
   %tmp1103 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins11602 = insertelement <4 x float> %tmp1103, float %val, i32 3
-  store volatile <4 x float> %vecins11602, <4 x float>* undef, align 16
+  %vecins11602 = insertelement <4 x float> %tmp1103, float undef, i32 3
+  store <4 x float> %vecins11602, <4 x float>* undef, align 16
   %tmp1104 = load <4 x float>, <4 x float>* undef
   %tmp1105 = load <4 x float>, <4 x float>* undef, align 16
   %add11604 = fadd <4 x float> %tmp1105, %tmp1104
   %tmp1106 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11605 = extractelement <4 x float> %tmp1106, i32 0
   %tmp1107 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins11607 = insertelement <4 x float> %tmp1107, float %val, i32 0
-  %vecins11621 = insertelement <4 x float> undef, float %val, i32 0
-  %vecins11630 = insertelement <4 x float> undef, float %val, i32 3
-  store volatile <4 x float> %vecins11630, <4 x float>* undef, align 16
-  store volatile <4 x float> <float -1.190000e+02, float 0x402F666660000000, float 0xC07BD33340000000, float -1.595000e+02>, <4 x float>* %.compoundliteral11631
+  %vecins11607 = insertelement <4 x float> %tmp1107, float undef, i32 0
+  %vecins11621 = insertelement <4 x float> undef, float undef, i32 0
+  %vecins11630 = insertelement <4 x float> undef, float undef, i32 3
+  store <4 x float> %vecins11630, <4 x float>* undef, align 16
+  store <4 x float> <float -1.190000e+02, float 0x402F666660000000, float 0xC07BD33340000000, float -1.595000e+02>, <4 x float>* %.compoundliteral11631
   %tmp1108 = load <4 x float>, <4 x float>* %.compoundliteral11631
   %tmp1109 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  %add11634 = fadd float %val, -1.075000e+02
+  store <4 x float> undef, <4 x float>* undef, align 16
+  %add11634 = fadd float undef, -1.075000e+02
   %vecext11647 = extractelement <4 x float> undef, i32 0
   %add11648 = fadd float %vecext11647, 0x40775999A0000000
   %tmp1110 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11650 = extractelement <4 x float> undef, i32 1
   %tmp1111 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins11784 = insertelement <4 x float> %tmp1111, float %val, i32 3
-  store volatile <4 x float> %vecins11784, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 1.605000e+02, float 0x4068366660000000, float 2.820000e+02, float 0x407CF66660000000>, <4 x float>* %.compoundliteral11785
+  %vecins11784 = insertelement <4 x float> %tmp1111, float undef, i32 3
+  store <4 x float> %vecins11784, <4 x float>* undef, align 16
+  store <4 x float> <float 1.605000e+02, float 0x4068366660000000, float 2.820000e+02, float 0x407CF66660000000>, <4 x float>* %.compoundliteral11785
   %tmp1112 = load <4 x float>, <4 x float>* %.compoundliteral11785
   %add11786 = fadd <4 x float> undef, %tmp1112
-  store volatile <4 x float> %add11786, <4 x float>* undef, align 16
+  store <4 x float> %add11786, <4 x float>* undef, align 16
   %tmp1113 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11787 = extractelement <4 x float> %tmp1113, i32 0
   %vecext11807 = extractelement <4 x float> undef, i32 2
@@ -6463,60 +6463,60 @@ entry:
   %add11811 = fadd float %vecext11810, 0x4068F66660000000
   %tmp1115 = load <4 x float>, <4 x float>* undef, align 16
   %vecins11812 = insertelement <4 x float> %tmp1115, float %add11811, i32 3
-  store volatile <4 x float> %vecins11812, <4 x float>* undef, align 16
+  store <4 x float> %vecins11812, <4 x float>* undef, align 16
   %tmp1116 = load <4 x float>, <4 x float>* undef
   %tmp1117 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11958 = extractelement <4 x float> undef, i32 1
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %vecext11961 = extractelement <4 x float> undef, i32 2
   %add11962 = fadd float %vecext11961, -3.680000e+02
   %tmp1118 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  %add11965 = fadd float %val, 0x4061133340000000
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
+  %add11965 = fadd float undef, 0x4061133340000000
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1119 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11975 = extractelement <4 x float> %tmp1119, i32 2
   %tmp1120 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins11977 = insertelement <4 x float> %tmp1120, float %val, i32 2
-  store volatile <4 x float> %vecins11977, <4 x float>* undef, align 16
+  %vecins11977 = insertelement <4 x float> %tmp1120, float undef, i32 2
+  store <4 x float> %vecins11977, <4 x float>* undef, align 16
   %vecext11978 = extractelement <4 x float> undef, i32 3
   %add11979 = fadd float %vecext11978, 0xC0688999A0000000
   %tmp1121 = load <4 x float>, <4 x float>* undef, align 16
   %vecins11980 = insertelement <4 x float> %tmp1121, float %add11979, i32 3
-  store volatile <4 x float> %vecins11980, <4 x float>* undef, align 16
+  store <4 x float> %vecins11980, <4 x float>* undef, align 16
   %add11982 = fadd <4 x float> undef, undef
-  store volatile <4 x float> %add11982, <4 x float>* undef, align 16
+  store <4 x float> %add11982, <4 x float>* undef, align 16
   %tmp1122 = load <4 x float>, <4 x float>* undef, align 16
   %vecext11983 = extractelement <4 x float> %tmp1122, i32 0
   %add11984 = fadd float %vecext11983, 0xC075966660000000
   %tmp1123 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins12005 = insertelement <4 x float> undef, float %val, i32 2
-  store volatile <4 x float> %vecins12005, <4 x float>* undef, align 16
+  %vecins12005 = insertelement <4 x float> undef, float undef, i32 2
+  store <4 x float> %vecins12005, <4 x float>* undef, align 16
   %tmp1124 = load <4 x float>, <4 x float>* undef, align 16
-  %add12007 = fadd float %val, 0xC07124CCC0000000
+  %add12007 = fadd float undef, 0xC07124CCC0000000
   %vecins12008 = insertelement <4 x float> undef, float %add12007, i32 3
-  store volatile <4 x float> %vecins12008, <4 x float>* undef, align 16
+  store <4 x float> %vecins12008, <4 x float>* undef, align 16
   %tmp1125 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1126 = load <4 x float>, <4 x float>* undef, align 16
-  %add12012 = fadd float %val, 0xC0750CCCC0000000
+  %add12012 = fadd float undef, 0xC0750CCCC0000000
   %tmp1127 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12013 = insertelement <4 x float> %tmp1127, float %add12012, i32 0
-  store volatile <4 x float> %vecins12013, <4 x float>* undef, align 16
+  store <4 x float> %vecins12013, <4 x float>* undef, align 16
   %tmp1128 = load <4 x float>, <4 x float>* undef, align 16
-  %add12015 = fadd float %val, 0x4079CE6660000000
+  %add12015 = fadd float undef, 0x4079CE6660000000
   %tmp1129 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12016 = insertelement <4 x float> %tmp1129, float %add12015, i32 1
-  store volatile <4 x float> %vecins12016, <4 x float>* undef, align 16
-  %add12018 = fadd float %val, 3.555000e+02
+  store <4 x float> %vecins12016, <4 x float>* undef, align 16
+  %add12018 = fadd float undef, 3.555000e+02
   %tmp1130 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12019 = insertelement <4 x float> %tmp1130, float %add12018, i32 2
   %tmp1131 = load <4 x float>, <4 x float>* undef, align 16
   %vecext12020 = extractelement <4 x float> %tmp1131, i32 3
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %vecext12028 = extractelement <4 x float> undef, i32 1
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x40791999A0000000, float 0x407C7CCCC0000000, float 0x4070F33340000000, float 0xC056ECCCC0000000>, <4 x float>* undef
+  store <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> <float 0x40791999A0000000, float 0x407C7CCCC0000000, float 0x4070F33340000000, float 0xC056ECCCC0000000>, <4 x float>* undef
   %tmp1132 = load <4 x float>, <4 x float>* undef, align 16
   %add12038 = fadd <4 x float> %tmp1132, undef
   %tmp1133 = load <4 x float>, <4 x float>* undef, align 16
@@ -6524,27 +6524,27 @@ entry:
   %add12043 = fadd float %vecext12042, 0x402F9999A0000000
   %tmp1134 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12044 = insertelement <4 x float> %tmp1134, float %add12043, i32 1
-  store volatile <4 x float> %vecins12044, <4 x float>* undef, align 16
+  store <4 x float> %vecins12044, <4 x float>* undef, align 16
   %vecext12045 = extractelement <4 x float> undef, i32 2
   %add12046 = fadd float %vecext12045, 0xC07EF33340000000
   %tmp1135 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12047 = insertelement <4 x float> %tmp1135, float %add12046, i32 2
-  store volatile <4 x float> %vecins12047, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> %vecins12047, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1136 = load <4 x float>, <4 x float>* undef, align 16
   %vecext12112 = extractelement <4 x float> %tmp1136, i32 1
   %tmp1137 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  %add12116 = fadd float %val, 0xC074F4CCC0000000
+  store <4 x float> undef, <4 x float>* undef, align 16
+  %add12116 = fadd float undef, 0xC074F4CCC0000000
   %tmp1138 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12117 = insertelement <4 x float> %tmp1138, float %add12116, i32 2
-  store volatile <4 x float> %vecins12117, <4 x float>* undef, align 16
+  store <4 x float> %vecins12117, <4 x float>* undef, align 16
   %tmp1139 = load <4 x float>, <4 x float>* undef, align 16
   %vecext12118 = extractelement <4 x float> %tmp1139, i32 3
   %add12119 = fadd float %vecext12118, 0xC0638CCCC0000000
   %tmp1140 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12120 = insertelement <4 x float> %tmp1140, float %add12119, i32 3
-  %add12152 = fadd float %val, 0x4039333340000000
+  %add12152 = fadd float undef, 0x4039333340000000
   %tmp1141 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12153 = insertelement <4 x float> %tmp1141, float %add12152, i32 0
   %vecext12154 = extractelement <4 x float> undef, i32 1
@@ -6561,67 +6561,67 @@ entry:
   %add12161 = fadd float %vecext12160, 0x407B1999A0000000
   %tmp1146 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12162 = insertelement <4 x float> %tmp1146, float %add12161, i32 3
-  store volatile <4 x float> %vecins12162, <4 x float>* undef, align 16
+  store <4 x float> %vecins12162, <4 x float>* undef, align 16
   %tmp1147 = load <4 x float>, <4 x float>* undef
   %tmp1148 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1149 = load <4 x float>, <4 x float>* undef, align 16
   %vecext12182 = extractelement <4 x float> %tmp1149, i32 1
   %tmp1150 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x4061833340000000, float 0x405CA66660000000, float -1.275000e+02, float 0x405BC66660000000>, <4 x float>* undef
-  %add12208 = fadd float %val, 0x407854CCC0000000
+  store <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> <float 0x4061833340000000, float 0x405CA66660000000, float -1.275000e+02, float 0x405BC66660000000>, <4 x float>* undef
+  %add12208 = fadd float undef, 0x407854CCC0000000
   %tmp1151 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1152 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1153 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins12218 = insertelement <4 x float> undef, float %val, i32 3
-  store volatile <4 x float> %vecins12218, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x407C3CCCC0000000, float 0xC057C66660000000, float 2.605000e+02, float 0xC07974CCC0000000>, <4 x float>* undef
+  %vecins12218 = insertelement <4 x float> undef, float undef, i32 3
+  store <4 x float> %vecins12218, <4 x float>* undef, align 16
+  store <4 x float> <float 0x407C3CCCC0000000, float 0xC057C66660000000, float 2.605000e+02, float 0xC07974CCC0000000>, <4 x float>* undef
   %tmp1154 = load <4 x float>, <4 x float>* undef
   %tmp1155 = load <4 x float>, <4 x float>* undef, align 16
   %add12220 = fadd <4 x float> %tmp1155, %tmp1154
   %tmp1156 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1157 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins12223 = insertelement <4 x float> %tmp1157, float %val, i32 0
-  store volatile <4 x float> %vecins12223, <4 x float>* undef, align 16
+  %vecins12223 = insertelement <4 x float> %tmp1157, float undef, i32 0
+  store <4 x float> %vecins12223, <4 x float>* undef, align 16
   %tmp1158 = load <4 x float>, <4 x float>* undef, align 16
-  %add12242 = fadd float %val, 0x4067E33340000000
+  %add12242 = fadd float undef, 0x4067E33340000000
   %tmp1159 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12243 = insertelement <4 x float> %tmp1159, float %add12242, i32 2
-  store volatile <4 x float> %vecins12243, <4 x float>* undef, align 16
+  store <4 x float> %vecins12243, <4 x float>* undef, align 16
   %tmp1160 = load <4 x float>, <4 x float>* undef, align 16
   %vecext12244 = extractelement <4 x float> %tmp1160, i32 3
   %add12245 = fadd float %vecext12244, 0x4071AE6660000000
   %tmp1161 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12246 = insertelement <4 x float> %tmp1161, float %add12245, i32 3
-  store volatile <4 x float> %vecins12246, <4 x float>* undef, align 16
-  store volatile <4 x float> <float -4.880000e+02, float 0xC079966660000000, float -8.450000e+01, float 0xC0464CCCC0000000>, <4 x float>* %.compoundliteral12247
+  store <4 x float> %vecins12246, <4 x float>* undef, align 16
+  store <4 x float> <float -4.880000e+02, float 0xC079966660000000, float -8.450000e+01, float 0xC0464CCCC0000000>, <4 x float>* %.compoundliteral12247
   %tmp1162 = load <4 x float>, <4 x float>* %.compoundliteral12247
   %tmp1163 = load <4 x float>, <4 x float>* undef, align 16
   %add12248 = fadd <4 x float> %tmp1163, %tmp1162
-  store volatile <4 x float> %add12248, <4 x float>* undef, align 16
+  store <4 x float> %add12248, <4 x float>* undef, align 16
   %tmp1164 = load <4 x float>, <4 x float>* undef, align 16
   %vecext12249 = extractelement <4 x float> %tmp1164, i32 0
   %add12250 = fadd float %vecext12249, 1.075000e+02
   %tmp1165 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1166 = load <4 x float>, <4 x float>* undef, align 16
   %vecext12252 = extractelement <4 x float> %tmp1166, i32 1
   %add12253 = fadd float %vecext12252, 0xC0662CCCC0000000
   %tmp1167 = load <4 x float>, <4 x float>* undef, align 16
   %vecins12254 = insertelement <4 x float> %tmp1167, float %add12253, i32 1
-  store volatile <4 x float> %vecins12254, <4 x float>* undef, align 16
+  store <4 x float> %vecins12254, <4 x float>* undef, align 16
   %tmp1168 = load <4 x float>, <4 x float>* undef, align 16
   %vecext12255 = extractelement <4 x float> %tmp1168, i32 2
   %add12256 = fadd float %vecext12255, 0x40554CCCC0000000
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  %add13141 = fadd float %val, 0x40768999A0000000
+  store <4 x float> undef, <4 x float>* undef, align 16
+  %add13141 = fadd float undef, 0x40768999A0000000
   %tmp1169 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13142 = insertelement <4 x float> %tmp1169, float %add13141, i32 3
-  store volatile <4 x float> %vecins13142, <4 x float>* undef, align 16
+  store <4 x float> %vecins13142, <4 x float>* undef, align 16
   %tmp1170 = load <4 x float>, <4 x float>* undef
   %add13144 = fadd <4 x float> undef, %tmp1170
-  store volatile <4 x float> %add13144, <4 x float>* undef, align 16
+  store <4 x float> %add13144, <4 x float>* undef, align 16
   %tmp1171 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13145 = extractelement <4 x float> %tmp1171, i32 0
   %add13146 = fadd float %vecext13145, 3.975000e+02
@@ -6630,137 +6630,137 @@ entry:
   %add13379 = fadd float %vecext13378, 0xC053B33340000000
   %tmp1173 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13380 = insertelement <4 x float> %tmp1173, float %add13379, i32 3
-  store volatile <4 x float> %vecins13380, <4 x float>* undef, align 16
+  store <4 x float> %vecins13380, <4 x float>* undef, align 16
   %tmp1174 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins13408 = insertelement <4 x float> %tmp1174, float %val, i32 3
-  store volatile <4 x float> %vecins13408, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0xC0455999A0000000, float 0xC07D366660000000, float 4.240000e+02, float -1.670000e+02>, <4 x float>* undef
+  %vecins13408 = insertelement <4 x float> %tmp1174, float undef, i32 3
+  store <4 x float> %vecins13408, <4 x float>* undef, align 16
+  store <4 x float> <float 0xC0455999A0000000, float 0xC07D366660000000, float 4.240000e+02, float -1.670000e+02>, <4 x float>* undef
   %tmp1175 = load <4 x float>, <4 x float>* undef
   %tmp1176 = load <4 x float>, <4 x float>* undef, align 16
   %add13410 = fadd <4 x float> %tmp1176, %tmp1175
-  store volatile <4 x float> %add13410, <4 x float>* undef, align 16
+  store <4 x float> %add13410, <4 x float>* undef, align 16
   %tmp1177 = load <4 x float>, <4 x float>* undef, align 16
-  %add13412 = fadd float %val, 0xC0708999A0000000
+  %add13412 = fadd float undef, 0xC0708999A0000000
   %tmp1178 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13413 = insertelement <4 x float> %tmp1178, float %add13412, i32 0
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %vecext13428 = extractelement <4 x float> undef, i32 1
   %add13429 = fadd float %vecext13428, 0xC063BCCCC0000000
   %tmp1179 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13430 = insertelement <4 x float> %tmp1179, float %add13429, i32 1
-  store volatile <4 x float> %vecins13430, <4 x float>* undef, align 16
+  store <4 x float> %vecins13430, <4 x float>* undef, align 16
   %tmp1180 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13431 = extractelement <4 x float> %tmp1180, i32 2
-  %vecins13433 = insertelement <4 x float> undef, float %val, i32 2
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  %add13449 = fadd float %val, 4.590000e+02
+  %vecins13433 = insertelement <4 x float> undef, float undef, i32 2
+  store <4 x float> undef, <4 x float>* undef, align 16
+  %add13449 = fadd float undef, 4.590000e+02
   %tmp1181 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13450 = insertelement <4 x float> %tmp1181, float %add13449, i32 3
-  store volatile <4 x float> %vecins13450, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0xC073A66660000000, float 0xC041B33340000000, float 0x4066233340000000, float 0x4071C33340000000>, <4 x float>* undef
+  store <4 x float> %vecins13450, <4 x float>* undef, align 16
+  store <4 x float> <float 0xC073A66660000000, float 0xC041B33340000000, float 0x4066233340000000, float 0x4071C33340000000>, <4 x float>* undef
   %tmp1182 = load <4 x float>, <4 x float>* undef
   %tmp1183 = load <4 x float>, <4 x float>* undef, align 16
   %add13452 = fadd <4 x float> %tmp1183, %tmp1182
-  store volatile <4 x float> %add13452, <4 x float>* undef, align 16
+  store <4 x float> %add13452, <4 x float>* undef, align 16
   %tmp1184 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13453 = extractelement <4 x float> %tmp1184, i32 0
   %add13454 = fadd float %vecext13453, 0xC072866660000000
   %tmp1185 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13455 = insertelement <4 x float> %tmp1185, float %add13454, i32 0
-  %add13471 = fadd float %val, 0xC0556CCCC0000000
+  %add13471 = fadd float undef, 0xC0556CCCC0000000
   %tmp1186 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13472 = insertelement <4 x float> %tmp1186, float %add13471, i32 1
-  store volatile <4 x float> %vecins13472, <4 x float>* undef, align 16
+  store <4 x float> %vecins13472, <4 x float>* undef, align 16
   %tmp1187 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13473 = extractelement <4 x float> %tmp1187, i32 2
   %add13474 = fadd float %vecext13473, 0xC0786999A0000000
   %tmp1188 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13475 = insertelement <4 x float> %tmp1188, float %add13474, i32 2
-  store volatile <4 x float> %vecins13475, <4 x float>* undef, align 16
-  %add13477 = fadd float %val, 0xC07C3E6660000000
+  store <4 x float> %vecins13475, <4 x float>* undef, align 16
+  %add13477 = fadd float undef, 0xC07C3E6660000000
   %tmp1189 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13478 = insertelement <4 x float> %tmp1189, float %add13477, i32 3
-  store volatile <4 x float> %vecins13478, <4 x float>* undef, align 16
-  store volatile <4 x float> <float -4.740000e+02, float 0x4023CCCCC0000000, float 0xC05C266660000000, float 0x407B7199A0000000>, <4 x float>* undef
+  store <4 x float> %vecins13478, <4 x float>* undef, align 16
+  store <4 x float> <float -4.740000e+02, float 0x4023CCCCC0000000, float 0xC05C266660000000, float 0x407B7199A0000000>, <4 x float>* undef
   %tmp1190 = load <4 x float>, <4 x float>* undef, align 16
   %add13480 = fadd <4 x float> %tmp1190, undef
-  store volatile <4 x float> %add13480, <4 x float>* undef, align 16
+  store <4 x float> %add13480, <4 x float>* undef, align 16
   %tmp1191 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13481 = extractelement <4 x float> %tmp1191, i32 0
   %add13482 = fadd float %vecext13481, 0xC07BA4CCC0000000
   %tmp1192 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13483 = insertelement <4 x float> %tmp1192, float %add13482, i32 0
-  store volatile <4 x float> %vecins13483, <4 x float>* undef, align 16
+  store <4 x float> %vecins13483, <4 x float>* undef, align 16
   %tmp1193 = load <4 x float>, <4 x float>* undef, align 16
-  %add13485 = fadd float %val, 0x406B1999A0000000
+  %add13485 = fadd float undef, 0x406B1999A0000000
   %tmp1194 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13486 = insertelement <4 x float> %tmp1194, float %add13485, i32 1
-  store volatile <4 x float> %vecins13486, <4 x float>* undef, align 16
+  store <4 x float> %vecins13486, <4 x float>* undef, align 16
   %tmp1195 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13487 = extractelement <4 x float> %tmp1195, i32 2
   %add13488 = fadd float %vecext13487, 0x40647999A0000000
   %tmp1196 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13489 = insertelement <4 x float> %tmp1196, float %add13488, i32 2
-  store volatile <4 x float> %vecins13489, <4 x float>* undef, align 16
+  store <4 x float> %vecins13489, <4 x float>* undef, align 16
   %tmp1197 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13490 = extractelement <4 x float> %tmp1197, i32 3
   %tmp1198 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins13492 = insertelement <4 x float> %tmp1198, float %val, i32 3
-  store volatile <4 x float> %vecins13492, <4 x float>* undef, align 16
+  %vecins13492 = insertelement <4 x float> %tmp1198, float undef, i32 3
+  store <4 x float> %vecins13492, <4 x float>* undef, align 16
   %tmp1199 = load <4 x float>, <4 x float>* %.compoundliteral13493
   %tmp1200 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
-  %vecins13548 = insertelement <4 x float> undef, float %val, i32 3
-  store volatile <4 x float> <float 4.540000e+02, float 3.760000e+02, float 0x406EA33340000000, float 0x405AACCCC0000000>, <4 x float>* %.compoundliteral13549
+  store <4 x float> undef, <4 x float>* undef, align 16
+  %vecins13548 = insertelement <4 x float> undef, float undef, i32 3
+  store <4 x float> <float 4.540000e+02, float 3.760000e+02, float 0x406EA33340000000, float 0x405AACCCC0000000>, <4 x float>* %.compoundliteral13549
   %tmp1201 = load <4 x float>, <4 x float>* undef, align 16
-  %add13552 = fadd float %val, 3.230000e+02
+  %add13552 = fadd float undef, 3.230000e+02
   %tmp1202 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13553 = insertelement <4 x float> %tmp1202, float %add13552, i32 0
   %tmp1203 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13554 = extractelement <4 x float> %tmp1203, i32 1
   %tmp1204 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins13556 = insertelement <4 x float> %tmp1204, float %val, i32 1
-  store volatile <4 x float> %vecins13556, <4 x float>* undef, align 16
+  %vecins13556 = insertelement <4 x float> %tmp1204, float undef, i32 1
+  store <4 x float> %vecins13556, <4 x float>* undef, align 16
   %tmp1205 = load <4 x float>, <4 x float>* undef, align 16
-  %add13558 = fadd float %val, 2.625000e+02
+  %add13558 = fadd float undef, 2.625000e+02
   %tmp1206 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13559 = insertelement <4 x float> %tmp1206, float %add13558, i32 2
-  store volatile <4 x float> %vecins13559, <4 x float>* undef, align 16
-  %add13575 = fadd float %val, -4.725000e+02
+  store <4 x float> %vecins13559, <4 x float>* undef, align 16
+  %add13575 = fadd float undef, -4.725000e+02
   %tmp1207 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13576 = insertelement <4 x float> %tmp1207, float %add13575, i32 3
-  store volatile <4 x float> %vecins13576, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x40334CCCC0000000, float 0xC0785CCCC0000000, float 0xC078D66660000000, float 3.745000e+02>, <4 x float>* undef
+  store <4 x float> %vecins13576, <4 x float>* undef, align 16
+  store <4 x float> <float 0x40334CCCC0000000, float 0xC0785CCCC0000000, float 0xC078D66660000000, float 3.745000e+02>, <4 x float>* undef
   %tmp1208 = load <4 x float>, <4 x float>* undef
   %tmp1209 = load <4 x float>, <4 x float>* undef, align 16
   %add13578 = fadd <4 x float> %tmp1209, %tmp1208
-  store volatile <4 x float> %add13578, <4 x float>* undef, align 16
+  store <4 x float> %add13578, <4 x float>* undef, align 16
   %tmp1210 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1211 = load <4 x float>, <4 x float>* undef, align 16
   %add13592 = fadd <4 x float> %tmp1211, undef
-  store volatile <4 x float> %add13592, <4 x float>* undef, align 16
+  store <4 x float> %add13592, <4 x float>* undef, align 16
   %tmp1212 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13593 = extractelement <4 x float> %tmp1212, i32 0
   %add13594 = fadd float %vecext13593, 0xC0708B3340000000
   %tmp1213 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1214 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13596 = extractelement <4 x float> %tmp1214, i32 1
   %add13597 = fadd float %vecext13596, 0x40660999A0000000
-  %vecins13604 = insertelement <4 x float> undef, float %val, i32 3
-  store volatile <4 x float> %vecins13604, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x407B4999A0000000, float 0xC067F66660000000, float 0xC068F999A0000000, float 0xC079233340000000>, <4 x float>* undef
+  %vecins13604 = insertelement <4 x float> undef, float undef, i32 3
+  store <4 x float> %vecins13604, <4 x float>* undef, align 16
+  store <4 x float> <float 0x407B4999A0000000, float 0xC067F66660000000, float 0xC068F999A0000000, float 0xC079233340000000>, <4 x float>* undef
   %tmp1215 = load <4 x float>, <4 x float>* undef, align 16
   %add13606 = fadd <4 x float> %tmp1215, undef
   %tmp1216 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13607 = extractelement <4 x float> %tmp1216, i32 0
-  %vecins13609 = insertelement <4 x float> undef, float %val, i32 0
+  %vecins13609 = insertelement <4 x float> undef, float undef, i32 0
   %tmp1217 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1218 = load <4 x float>, <4 x float>* undef, align 16
-  %add13622 = fadd float %val, -3.390000e+02
+  %add13622 = fadd float undef, -3.390000e+02
   %vecins13623 = insertelement <4 x float> undef, float %add13622, i32 0
-  store volatile <4 x float> %vecins13623, <4 x float>* undef, align 16
+  store <4 x float> %vecins13623, <4 x float>* undef, align 16
   %tmp1219 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13624 = extractelement <4 x float> %tmp1219, i32 1
   %add13625 = fadd float %vecext13624, 0x405C3999A0000000
@@ -6772,41 +6772,41 @@ entry:
   %add13631 = fadd float %vecext13630, 0xC060333340000000
   %tmp1222 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13632 = insertelement <4 x float> %tmp1222, float %add13631, i32 3
-  store volatile <4 x float> %vecins13632, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x4078D66660000000, float 0x4048B33340000000, float 0x4051466660000000, float -2.965000e+02>, <4 x float>* undef
+  store <4 x float> %vecins13632, <4 x float>* undef, align 16
+  store <4 x float> <float 0x4078D66660000000, float 0x4048B33340000000, float 0x4051466660000000, float -2.965000e+02>, <4 x float>* undef
   %tmp1223 = load <4 x float>, <4 x float>* undef
   %tmp1224 = load <4 x float>, <4 x float>* undef, align 16
   %add13634 = fadd <4 x float> %tmp1224, %tmp1223
-  store volatile <4 x float> %add13634, <4 x float>* undef, align 16
+  store <4 x float> %add13634, <4 x float>* undef, align 16
   %vecext13635 = extractelement <4 x float> undef, i32 0
   %add13636 = fadd float %vecext13635, 0x406A5999A0000000
   %tmp1225 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13637 = insertelement <4 x float> %tmp1225, float %add13636, i32 0
-  store volatile <4 x float> %vecins13637, <4 x float>* undef, align 16
+  store <4 x float> %vecins13637, <4 x float>* undef, align 16
   %tmp1226 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1227 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins13643 = insertelement <4 x float> %tmp1227, float %val, i32 2
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  %vecins13643 = insertelement <4 x float> %tmp1227, float undef, i32 2
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1228 = load <4 x float>, <4 x float>* undef, align 16
-  %add13785 = fadd float %val, 0x4068866660000000
+  %add13785 = fadd float undef, 0x4068866660000000
   %tmp1229 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13786 = insertelement <4 x float> %tmp1229, float %add13785, i32 3
-  store volatile <4 x float> %vecins13786, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x407704CCC0000000, float 0x4047B33340000000, float 0x40797B3340000000, float 0xC0652CCCC0000000>, <4 x float>* %.compoundliteral13787
+  store <4 x float> %vecins13786, <4 x float>* undef, align 16
+  store <4 x float> <float 0x407704CCC0000000, float 0x4047B33340000000, float 0x40797B3340000000, float 0xC0652CCCC0000000>, <4 x float>* %.compoundliteral13787
   %tmp1230 = load <4 x float>, <4 x float>* undef, align 16
   %add13788 = fadd <4 x float> %tmp1230, undef
   %tmp1231 = load <4 x float>, <4 x float>* undef
   %tmp1232 = load <4 x float>, <4 x float>* undef, align 16
   %add13802 = fadd <4 x float> %tmp1232, %tmp1231
-  store volatile <4 x float> %add13802, <4 x float>* undef, align 16
+  store <4 x float> %add13802, <4 x float>* undef, align 16
   %tmp1233 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13803 = extractelement <4 x float> %tmp1233, i32 0
   %add13804 = fadd float %vecext13803, -2.900000e+01
   %tmp1234 = load <4 x float>, <4 x float>* undef, align 16
   %vecins13805 = insertelement <4 x float> %tmp1234, float %add13804, i32 0
-  store volatile <4 x float> %vecins13805, <4 x float>* undef, align 16
+  store <4 x float> %vecins13805, <4 x float>* undef, align 16
   %tmp1235 = load <4 x float>, <4 x float>* undef, align 16
-  %add13807 = fadd float %val, 6.400000e+01
+  %add13807 = fadd float undef, 6.400000e+01
   %tmp1236 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1237 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13809 = extractelement <4 x float> %tmp1237, i32 2
@@ -6814,28 +6814,28 @@ entry:
   %vecext13812 = extractelement <4 x float> %tmp1238, i32 3
   %add13813 = fadd float %vecext13812, -3.615000e+02
   %vecins13814 = insertelement <4 x float> undef, float %add13813, i32 3
-  store volatile <4 x float> %vecins13814, <4 x float>* undef, align 16
-  store volatile <4 x float> <float -2.270000e+02, float -1.500000e+01, float 0x407084CCC0000000, float -1.425000e+02>, <4 x float>* undef
+  store <4 x float> %vecins13814, <4 x float>* undef, align 16
+  store <4 x float> <float -2.270000e+02, float -1.500000e+01, float 0x407084CCC0000000, float -1.425000e+02>, <4 x float>* undef
   %tmp1239 = load <4 x float>, <4 x float>* undef
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1240 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13817 = extractelement <4 x float> %tmp1240, i32 0
-  %vecins13856 = insertelement <4 x float> undef, float %val, i32 3
-  store volatile <4 x float> %vecins13856, <4 x float>* undef, align 16
-  store volatile <4 x float> <float 0x40656CCCC0000000, float 0xC0656999A0000000, float 0x40778E6660000000, float 0x407ECE6660000000>, <4 x float>* undef
+  %vecins13856 = insertelement <4 x float> undef, float undef, i32 3
+  store <4 x float> %vecins13856, <4 x float>* undef, align 16
+  store <4 x float> <float 0x40656CCCC0000000, float 0xC0656999A0000000, float 0x40778E6660000000, float 0x407ECE6660000000>, <4 x float>* undef
   %tmp1241 = load <4 x float>, <4 x float>* undef
   %tmp1242 = load <4 x float>, <4 x float>* undef, align 16
-  store volatile <4 x float> undef, <4 x float>* undef, align 16
+  store <4 x float> undef, <4 x float>* undef, align 16
   %tmp1243 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13859 = extractelement <4 x float> %tmp1243, i32 0
   %tmp1244 = load <4 x float>, <4 x float>* undef, align 16
-  %vecins13861 = insertelement <4 x float> %tmp1244, float %val, i32 0
+  %vecins13861 = insertelement <4 x float> %tmp1244, float undef, i32 0
   %tmp1245 = load <4 x float>, <4 x float>* undef, align 16
   %vecext13862 = extractelement <4 x float> %tmp1245, i32 1
   %add13863 = fadd float %vecext13862, -1.380000e+02
   %vecins13864 = insertelement <4 x float> undef, float %add13863, i32 1
-  %vecins13867 = insertelement <4 x float> undef, float %val, i32 2
-  store volatile <4 x float> %vecins13867, <4 x float>* undef, align 16
+  %vecins13867 = insertelement <4 x float> undef, float undef, i32 2
+  store <4 x float> %vecins13867, <4 x float>* undef, align 16
   %tmp1246 = load <4 x float>, <4 x float>* undef, align 16
   %tmp1247 = load <4 x float>, <4 x float>* undef, align 16
   ret <4 x float> undef

diff  --git a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
index 546ffdd66ff8..c0c691fc5b0a 100644
--- a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
+++ b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll
@@ -1,13 +1,13 @@
 ; RUN: llc -march=hexagon -O0 -hexagon-align-loads=0 < %s | FileCheck %s
 
 ; CHECK-LABEL: danny:
-; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0)
-; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r0+#2)
-; CHECK:     [[T0]] |= asl([[T1]],#16)
-; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r0+#4)
-; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r0+#6)
-; CHECK:     [[T2]] |= asl([[T3]],#16)
-; CHECK:     combine([[T2]],[[T0]])
+; CHECK:     r1 = r0
+; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r1+#0)
+; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r1+#2)
+; CHECK:     r2 |= asl([[T1]],#16)
+; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r1+#4)
+; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r1+#6)
+; CHECK:     r1 |= asl([[T3]],#16)
 define <4 x i16> @danny(<4 x i16>* %p) {
   %t0 = load <4 x i16>, <4 x i16>* %p, align 2
   ret <4 x i16> %t0
@@ -15,8 +15,8 @@ define <4 x i16> @danny(<4 x i16>* %p) {
 
 ; CHECK-LABEL: sammy:
 ; CHECK-DAG: [[T0:r[0-9]+]] = memw(r0+#0)
-; CHECK-DAG: [[T1:r[0-9]+]] = memw(r0+#4)
-; CHECK:     combine([[T1]],[[T0]])
+; CHECK-DAG: r1 = memw(r0+#4)
+; CHECK:     r0 = [[T0]]
 define <4 x i16> @sammy(<4 x i16>* %p) {
   %t0 = load <4 x i16>, <4 x i16>* %p, align 4
   ret <4 x i16> %t0

diff  --git a/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll b/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
index 9025af913ad7..f22fbcc7b73e 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
@@ -244,12 +244,12 @@ define void @cxiiiiconv() {
   ; ALL-DAG:        lw      $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]])
   ; ALL-DAG:        lbu     $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]])
   ; 32R1-DAG:       sll     $[[REG_C1_1:[0-9]+]], $[[REG_C1]], 24
-  ; 32R1-DAG:       sra     $5, $[[REG_C1_1]], 24
-  ; 32R2-DAG:       seb     $5, $[[REG_C1]]
+  ; 32R1-DAG:       sra     $4, $[[REG_C1_1]], 24
+  ; 32R2-DAG:       seb     $4, $[[REG_C1]]
   ; FIXME: andi is superfulous
   ; ALL-DAG:        lw      $[[REG_UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
   ; ALL-DAG:        lbu     $[[REG_UC1:[0-9]+]], 0($[[REG_UC1_ADDR]])
-  ; ALL-DAG:        andi    $4, $[[REG_UC1]], 255
+  ; ALL-DAG:        andi    $5, $[[REG_UC1]], 255
   ; ALL-DAG:        lw      $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
   ; ALL-DAG:        lhu     $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]])
   ; 32R1-DAG:       sll     $[[REG_S1_1:[0-9]+]], $[[REG_S1]], 16

diff  --git a/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
index 3e30f75d6f4a..1a2ad44b0a6b 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll
@@ -17,15 +17,9 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1)
 define void @cpy(i8* %src, i32 %i) {
   ; ALL-LABEL:  cpy:
 
-  ; ALL-DAG:        lw    $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
-  ; ALL-DAG:        sw    $4, 24($sp)
-  ; ALL-DAG:        move  $4, $[[T0]]
-  ; ALL-DAG:        sw    $5, 20($sp)
-  ; ALL-DAG:        lw    $[[T1:[0-9]+]], 24($sp)
-  ; ALL-DAG:        move  $5, $[[T1]]
-  ; ALL-DAG:        lw    $6, 20($sp)
-  ; ALL-DAG:        lw    $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}})
-  ; ALL:            jalr  $[[T2]]
+  ; ALL:        lw    $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+  ; ALL:        lw    $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}})
+  ; ALL:        jalr  $[[T2]]
   ; ALL-NEXT:       nop
   ; ALL-NOT:        {{.*}}$2{{.*}}
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8* %src, i32 %i, i1 false)
@@ -36,14 +30,8 @@ define void @mov(i8* %src, i32 %i) {
   ; ALL-LABEL:  mov:
 
 
-  ; ALL-DAG:        lw    $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
-  ; ALL-DAG:        sw    $4, 24($sp)
-  ; ALL-DAG:        move  $4, $[[T0]]
-  ; ALL-DAG:        sw    $5, 20($sp)
-  ; ALL-DAG:        lw    $[[T1:[0-9]+]], 24($sp)
-  ; ALL-DAG:        move  $5, $[[T1]]
-  ; ALL-DAG:        lw    $6, 20($sp)
-  ; ALL-DAG:        lw    $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}})
+  ; ALL:        lw    $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+  ; ALL:        lw    $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}})
   ; ALL:            jalr  $[[T2]]
   ; ALL-NEXT:       nop
   ; ALL-NOT:        {{.*}}$2{{.*}}
@@ -54,15 +42,8 @@ define void @mov(i8* %src, i32 %i) {
 define void @clear(i32 %i) {
   ; ALL-LABEL:  clear:
 
-  ; ALL-DAG:        lw    $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
-  ; ALL-DAG:        sw    $4, 16($sp)
-  ; ALL-DAG:        move  $4, $[[T0]]
-  ; ALL-DAG:        addiu $[[T1:[0-9]+]], $zero, 42
-  ; 32R1-DAG:       sll   $[[T2:[0-9]+]], $[[T1]], 24
-  ; 32R1-DAG:       sra   $5, $[[T2]], 24
-  ; 32R2-DAG:       seb   $5, $[[T1]]
-  ; ALL-DAG:        lw    $6, 16($sp)
-  ; ALL-DAG:        lw    $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}})
+  ; ALL:        lw    $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}})
+  ; ALL:        lw    $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}})
   ; ALL:            jalr  $[[T2]]
   ; ALL-NEXT:       nop
   ; ALL-NOT:        {{.*}}$2{{.*}}

diff  --git a/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll b/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll
index 6befe70270df..e482a13f3d5c 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll
@@ -5,9 +5,10 @@ define void @test(i32 %x, i1* %p) nounwind {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    move $1, $4
-; CHECK-NEXT:    andi $2, $4, 1
-; CHECK-NEXT:    sb $2, 0($5)
+; CHECK-NEXT:    move $4, $1
 ; CHECK-NEXT:    andi $1, $1, 1
+; CHECK-NEXT:    sb $1, 0($5)
+; CHECK-NEXT:    andi $1, $4, 1
 ; CHECK-NEXT:    bgtz $1, $BB0_1
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  # %bb.1: # %foo

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll
index 0b217b837479..fe26837d0a34 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll
@@ -86,12 +86,11 @@ entry:
 define i64 @add_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: add_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addu $1, $6, $4
-; MIPS32-NEXT:    sltu $2, $1, $4
-; MIPS32-NEXT:    addu $3, $7, $5
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    addu $3, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    addu $2, $6, $4
+; MIPS32-NEXT:    sltu $3, $2, $4
+; MIPS32-NEXT:    addu $1, $7, $5
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    addu $3, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -102,34 +101,30 @@ entry:
 define i128 @add_i128(i128 %a, i128 %b) {
 ; MIPS32-LABEL: add_i128:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $sp, $sp, -8
-; MIPS32-NEXT:    .cfi_def_cfa_offset 8
+; MIPS32-NEXT:    move $8, $4
+; MIPS32-NEXT:    move $3, $5
+; MIPS32-NEXT:    move $4, $6
+; MIPS32-NEXT:    addiu $1, $sp, 16
+; MIPS32-NEXT:    lw $2, 0($1)
+; MIPS32-NEXT:    addiu $1, $sp, 20
+; MIPS32-NEXT:    lw $6, 0($1)
 ; MIPS32-NEXT:    addiu $1, $sp, 24
+; MIPS32-NEXT:    lw $5, 0($1)
+; MIPS32-NEXT:    addiu $1, $sp, 28
 ; MIPS32-NEXT:    lw $1, 0($1)
-; MIPS32-NEXT:    addiu $2, $sp, 28
-; MIPS32-NEXT:    lw $2, 0($2)
-; MIPS32-NEXT:    addiu $3, $sp, 32
-; MIPS32-NEXT:    lw $3, 0($3)
-; MIPS32-NEXT:    addiu $8, $sp, 36
-; MIPS32-NEXT:    lw $8, 0($8)
-; MIPS32-NEXT:    addu $1, $1, $4
-; MIPS32-NEXT:    sltu $4, $1, $4
-; MIPS32-NEXT:    addu $5, $2, $5
-; MIPS32-NEXT:    andi $4, $4, 1
+; MIPS32-NEXT:    addu $2, $2, $8
+; MIPS32-NEXT:    sltu $8, $2, $8
+; MIPS32-NEXT:    addu $3, $6, $3
+; MIPS32-NEXT:    andi $8, $8, 1
+; MIPS32-NEXT:    addu $3, $3, $8
+; MIPS32-NEXT:    sltu $6, $3, $6
 ; MIPS32-NEXT:    addu $4, $5, $4
-; MIPS32-NEXT:    sltu $2, $4, $2
-; MIPS32-NEXT:    addu $5, $3, $6
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    addu $2, $5, $2
-; MIPS32-NEXT:    sltu $3, $2, $3
-; MIPS32-NEXT:    addu $5, $8, $7
-; MIPS32-NEXT:    andi $3, $3, 1
-; MIPS32-NEXT:    addu $5, $5, $3
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    move $2, $1
-; MIPS32-NEXT:    move $3, $4
-; MIPS32-NEXT:    lw $4, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    addiu $sp, $sp, 8
+; MIPS32-NEXT:    andi $6, $6, 1
+; MIPS32-NEXT:    addu $4, $4, $6
+; MIPS32-NEXT:    sltu $5, $4, $5
+; MIPS32-NEXT:    addu $1, $1, $7
+; MIPS32-NEXT:    andi $5, $5, 1
+; MIPS32-NEXT:    addu $5, $1, $5
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll
index 5d8585173b99..74ecbf6ed7a8 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll
@@ -4,9 +4,9 @@
 define void @add_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) {
 ; P5600-LABEL: add_v16i8:
 ; P5600:       # %bb.0: # %entry
-; P5600-NEXT:    ld.b $w0, 0($4)
-; P5600-NEXT:    ld.b $w1, 0($5)
-; P5600-NEXT:    addv.b $w0, $w1, $w0
+; P5600-NEXT:    ld.b $w1, 0($4)
+; P5600-NEXT:    ld.b $w0, 0($5)
+; P5600-NEXT:    addv.b $w0, $w0, $w1
 ; P5600-NEXT:    st.b $w0, 0($6)
 ; P5600-NEXT:    jr $ra
 ; P5600-NEXT:    nop
@@ -21,9 +21,9 @@ entry:
 define void @add_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) {
 ; P5600-LABEL: add_v8i16:
 ; P5600:       # %bb.0: # %entry
-; P5600-NEXT:    ld.h $w0, 0($4)
-; P5600-NEXT:    ld.h $w1, 0($5)
-; P5600-NEXT:    addv.h $w0, $w1, $w0
+; P5600-NEXT:    ld.h $w1, 0($4)
+; P5600-NEXT:    ld.h $w0, 0($5)
+; P5600-NEXT:    addv.h $w0, $w0, $w1
 ; P5600-NEXT:    st.h $w0, 0($6)
 ; P5600-NEXT:    jr $ra
 ; P5600-NEXT:    nop
@@ -38,9 +38,9 @@ entry:
 define void @add_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) {
 ; P5600-LABEL: add_v4i32:
 ; P5600:       # %bb.0: # %entry
-; P5600-NEXT:    ld.w $w0, 0($4)
-; P5600-NEXT:    ld.w $w1, 0($5)
-; P5600-NEXT:    addv.w $w0, $w1, $w0
+; P5600-NEXT:    ld.w $w1, 0($4)
+; P5600-NEXT:    ld.w $w0, 0($5)
+; P5600-NEXT:    addv.w $w0, $w0, $w1
 ; P5600-NEXT:    st.w $w0, 0($6)
 ; P5600-NEXT:    jr $ra
 ; P5600-NEXT:    nop
@@ -55,9 +55,9 @@ entry:
 define void @add_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) {
 ; P5600-LABEL: add_v2i64:
 ; P5600:       # %bb.0: # %entry
-; P5600-NEXT:    ld.d $w0, 0($4)
-; P5600-NEXT:    ld.d $w1, 0($5)
-; P5600-NEXT:    addv.d $w0, $w1, $w0
+; P5600-NEXT:    ld.d $w1, 0($4)
+; P5600-NEXT:    ld.d $w0, 0($5)
+; P5600-NEXT:    addv.d $w0, $w0, $w1
 ; P5600-NEXT:    st.d $w0, 0($6)
 ; P5600-NEXT:    jr $ra
 ; P5600-NEXT:    nop

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll
index a9f49c025b95..32bc78827baf 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll
@@ -6,10 +6,10 @@ define { float, float } @add_complex_float({ float, float }* %a, { float, float
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lwc1 $f0, 0($4)
 ; MIPS32-NEXT:    lwc1 $f1, 4($4)
-; MIPS32-NEXT:    lwc1 $f2, 0($5)
-; MIPS32-NEXT:    lwc1 $f3, 4($5)
-; MIPS32-NEXT:    add.s $f0, $f0, $f2
-; MIPS32-NEXT:    add.s $f2, $f1, $f3
+; MIPS32-NEXT:    lwc1 $f3, 0($5)
+; MIPS32-NEXT:    lwc1 $f2, 4($5)
+; MIPS32-NEXT:    add.s $f0, $f0, $f3
+; MIPS32-NEXT:    add.s $f2, $f1, $f2
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -33,10 +33,10 @@ define { double, double } @add_complex_double({ double, double }* %a, { double,
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    ldc1 $f0, 0($4)
 ; MIPS32-NEXT:    ldc1 $f2, 8($4)
-; MIPS32-NEXT:    ldc1 $f4, 0($5)
-; MIPS32-NEXT:    ldc1 $f6, 8($5)
-; MIPS32-NEXT:    add.d $f0, $f0, $f4
-; MIPS32-NEXT:    add.d $f2, $f2, $f6
+; MIPS32-NEXT:    ldc1 $f6, 0($5)
+; MIPS32-NEXT:    ldc1 $f4, 8($5)
+; MIPS32-NEXT:    add.d $f0, $f0, $f6
+; MIPS32-NEXT:    add.d $f2, $f2, $f4
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -66,9 +66,9 @@ define void @call_ret_complex_float({ float, float }* %z) {
 ; MIPS32-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    jal ret_complex_float
 ; MIPS32-NEXT:    nop
-; MIPS32-NEXT:    lw $1, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    swc1 $f0, 0($1)
-; MIPS32-NEXT:    swc1 $f2, 4($1)
+; MIPS32-NEXT:    lw $4, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    swc1 $f0, 0($4)
+; MIPS32-NEXT:    swc1 $f2, 4($4)
 ; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 24
 ; MIPS32-NEXT:    jr $ra
@@ -95,9 +95,9 @@ define void @call_ret_complex_double({ double, double }* %z) {
 ; MIPS32-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    jal ret_complex_double
 ; MIPS32-NEXT:    nop
-; MIPS32-NEXT:    lw $1, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sdc1 $f0, 0($1)
-; MIPS32-NEXT:    sdc1 $f2, 8($1)
+; MIPS32-NEXT:    lw $4, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    sdc1 $f0, 0($4)
+; MIPS32-NEXT:    sdc1 $f2, 8($4)
 ; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 24
 ; MIPS32-NEXT:    jr $ra

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll
index 662bcdf757b6..2bbbe4ac3ae2 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll
@@ -6,64 +6,64 @@ declare i32 @llvm.bitreverse.i32(i32)
 define i32 @bitreverse_i32(i32 signext %a) {
 ; MIPS32-LABEL: bitreverse_i32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sll $1, $4, 24
-; MIPS32-NEXT:    srl $2, $4, 24
-; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    sll $2, $4, 24
+; MIPS32-NEXT:    srl $1, $4, 24
+; MIPS32-NEXT:    or $1, $1, $2
 ; MIPS32-NEXT:    andi $2, $4, 65280
 ; MIPS32-NEXT:    sll $2, $2, 8
 ; MIPS32-NEXT:    or $1, $1, $2
 ; MIPS32-NEXT:    srl $2, $4, 8
 ; MIPS32-NEXT:    andi $2, $2, 65280
-; MIPS32-NEXT:    or $1, $1, $2
-; MIPS32-NEXT:    lui $2, 61680
-; MIPS32-NEXT:    ori $2, $2, 61680
-; MIPS32-NEXT:    and $3, $1, $2
-; MIPS32-NEXT:    srl $3, $3, 4
-; MIPS32-NEXT:    sll $1, $1, 4
-; MIPS32-NEXT:    and $1, $1, $2
-; MIPS32-NEXT:    or $1, $3, $1
-; MIPS32-NEXT:    lui $2, 52428
-; MIPS32-NEXT:    ori $2, $2, 52428
-; MIPS32-NEXT:    and $3, $1, $2
-; MIPS32-NEXT:    srl $3, $3, 2
-; MIPS32-NEXT:    sll $1, $1, 2
-; MIPS32-NEXT:    and $1, $1, $2
-; MIPS32-NEXT:    or $1, $3, $1
-; MIPS32-NEXT:    lui $2, 43690
-; MIPS32-NEXT:    ori $2, $2, 43690
-; MIPS32-NEXT:    and $3, $1, $2
-; MIPS32-NEXT:    srl $3, $3, 1
-; MIPS32-NEXT:    sll $1, $1, 1
-; MIPS32-NEXT:    and $1, $1, $2
-; MIPS32-NEXT:    or $2, $3, $1
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    lui $1, 61680
+; MIPS32-NEXT:    ori $3, $1, 61680
+; MIPS32-NEXT:    and $1, $2, $3
+; MIPS32-NEXT:    srl $1, $1, 4
+; MIPS32-NEXT:    sll $2, $2, 4
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    lui $1, 52428
+; MIPS32-NEXT:    ori $3, $1, 52428
+; MIPS32-NEXT:    and $1, $2, $3
+; MIPS32-NEXT:    srl $1, $1, 2
+; MIPS32-NEXT:    sll $2, $2, 2
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    lui $1, 43690
+; MIPS32-NEXT:    ori $3, $1, 43690
+; MIPS32-NEXT:    and $1, $2, $3
+; MIPS32-NEXT:    srl $1, $1, 1
+; MIPS32-NEXT:    sll $2, $2, 1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    or $2, $1, $2
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R2-LABEL: bitreverse_i32:
 ; MIPS32R2:       # %bb.0: # %entry
 ; MIPS32R2-NEXT:    wsbh $1, $4
-; MIPS32R2-NEXT:    rotr $1, $1, 16
-; MIPS32R2-NEXT:    lui $2, 61680
-; MIPS32R2-NEXT:    ori $2, $2, 61680
-; MIPS32R2-NEXT:    and $3, $1, $2
-; MIPS32R2-NEXT:    srl $3, $3, 4
-; MIPS32R2-NEXT:    sll $1, $1, 4
-; MIPS32R2-NEXT:    and $1, $1, $2
-; MIPS32R2-NEXT:    or $1, $3, $1
-; MIPS32R2-NEXT:    lui $2, 52428
-; MIPS32R2-NEXT:    ori $2, $2, 52428
-; MIPS32R2-NEXT:    and $3, $1, $2
-; MIPS32R2-NEXT:    srl $3, $3, 2
-; MIPS32R2-NEXT:    sll $1, $1, 2
-; MIPS32R2-NEXT:    and $1, $1, $2
-; MIPS32R2-NEXT:    or $1, $3, $1
-; MIPS32R2-NEXT:    lui $2, 43690
-; MIPS32R2-NEXT:    ori $2, $2, 43690
-; MIPS32R2-NEXT:    and $3, $1, $2
-; MIPS32R2-NEXT:    srl $3, $3, 1
-; MIPS32R2-NEXT:    sll $1, $1, 1
-; MIPS32R2-NEXT:    and $1, $1, $2
-; MIPS32R2-NEXT:    or $2, $3, $1
+; MIPS32R2-NEXT:    rotr $2, $1, 16
+; MIPS32R2-NEXT:    lui $1, 61680
+; MIPS32R2-NEXT:    ori $3, $1, 61680
+; MIPS32R2-NEXT:    and $1, $2, $3
+; MIPS32R2-NEXT:    srl $1, $1, 4
+; MIPS32R2-NEXT:    sll $2, $2, 4
+; MIPS32R2-NEXT:    and $2, $2, $3
+; MIPS32R2-NEXT:    or $2, $1, $2
+; MIPS32R2-NEXT:    lui $1, 52428
+; MIPS32R2-NEXT:    ori $3, $1, 52428
+; MIPS32R2-NEXT:    and $1, $2, $3
+; MIPS32R2-NEXT:    srl $1, $1, 2
+; MIPS32R2-NEXT:    sll $2, $2, 2
+; MIPS32R2-NEXT:    and $2, $2, $3
+; MIPS32R2-NEXT:    or $2, $1, $2
+; MIPS32R2-NEXT:    lui $1, 43690
+; MIPS32R2-NEXT:    ori $3, $1, 43690
+; MIPS32R2-NEXT:    and $1, $2, $3
+; MIPS32R2-NEXT:    srl $1, $1, 1
+; MIPS32R2-NEXT:    sll $2, $2, 1
+; MIPS32R2-NEXT:    and $2, $2, $3
+; MIPS32R2-NEXT:    or $2, $1, $2
 ; MIPS32R2-NEXT:    jr $ra
 ; MIPS32R2-NEXT:    nop
 entry:
@@ -75,107 +75,107 @@ declare i64 @llvm.bitreverse.i64(i64)
 define i64 @bitreverse_i64(i64 signext %a) {
 ; MIPS32-LABEL: bitreverse_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sll $1, $5, 24
-; MIPS32-NEXT:    srl $2, $5, 24
-; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    move $3, $4
+; MIPS32-NEXT:    sll $2, $5, 24
+; MIPS32-NEXT:    srl $1, $5, 24
+; MIPS32-NEXT:    or $1, $1, $2
 ; MIPS32-NEXT:    andi $2, $5, 65280
 ; MIPS32-NEXT:    sll $2, $2, 8
 ; MIPS32-NEXT:    or $1, $1, $2
 ; MIPS32-NEXT:    srl $2, $5, 8
 ; MIPS32-NEXT:    andi $2, $2, 65280
-; MIPS32-NEXT:    or $1, $1, $2
-; MIPS32-NEXT:    lui $2, 61680
-; MIPS32-NEXT:    ori $2, $2, 61680
-; MIPS32-NEXT:    and $3, $1, $2
-; MIPS32-NEXT:    srl $3, $3, 4
-; MIPS32-NEXT:    sll $1, $1, 4
-; MIPS32-NEXT:    and $1, $1, $2
-; MIPS32-NEXT:    or $1, $3, $1
-; MIPS32-NEXT:    lui $3, 52428
-; MIPS32-NEXT:    ori $3, $3, 52428
-; MIPS32-NEXT:    and $5, $1, $3
-; MIPS32-NEXT:    srl $5, $5, 2
-; MIPS32-NEXT:    sll $1, $1, 2
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    or $1, $5, $1
-; MIPS32-NEXT:    lui $5, 43690
-; MIPS32-NEXT:    ori $5, $5, 43690
-; MIPS32-NEXT:    and $6, $1, $5
-; MIPS32-NEXT:    srl $6, $6, 1
-; MIPS32-NEXT:    sll $1, $1, 1
-; MIPS32-NEXT:    and $1, $1, $5
-; MIPS32-NEXT:    or $1, $6, $1
-; MIPS32-NEXT:    sll $6, $4, 24
-; MIPS32-NEXT:    srl $7, $4, 24
-; MIPS32-NEXT:    or $6, $7, $6
-; MIPS32-NEXT:    andi $7, $4, 65280
-; MIPS32-NEXT:    sll $7, $7, 8
-; MIPS32-NEXT:    or $6, $6, $7
-; MIPS32-NEXT:    srl $4, $4, 8
-; MIPS32-NEXT:    andi $4, $4, 65280
-; MIPS32-NEXT:    or $4, $6, $4
-; MIPS32-NEXT:    and $6, $4, $2
-; MIPS32-NEXT:    srl $6, $6, 4
-; MIPS32-NEXT:    sll $4, $4, 4
-; MIPS32-NEXT:    and $2, $4, $2
-; MIPS32-NEXT:    or $2, $6, $2
-; MIPS32-NEXT:    and $4, $2, $3
-; MIPS32-NEXT:    srl $4, $4, 2
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    lui $1, 61680
+; MIPS32-NEXT:    ori $6, $1, 61680
+; MIPS32-NEXT:    and $1, $2, $6
+; MIPS32-NEXT:    srl $1, $1, 4
+; MIPS32-NEXT:    sll $2, $2, 4
+; MIPS32-NEXT:    and $2, $2, $6
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    lui $1, 52428
+; MIPS32-NEXT:    ori $5, $1, 52428
+; MIPS32-NEXT:    and $1, $2, $5
+; MIPS32-NEXT:    srl $1, $1, 2
 ; MIPS32-NEXT:    sll $2, $2, 2
-; MIPS32-NEXT:    and $2, $2, $3
-; MIPS32-NEXT:    or $2, $4, $2
-; MIPS32-NEXT:    and $3, $2, $5
-; MIPS32-NEXT:    srl $3, $3, 1
-; MIPS32-NEXT:    sll $2, $2, 1
 ; MIPS32-NEXT:    and $2, $2, $5
-; MIPS32-NEXT:    or $3, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    lui $1, 43690
+; MIPS32-NEXT:    ori $4, $1, 43690
+; MIPS32-NEXT:    and $1, $2, $4
+; MIPS32-NEXT:    srl $1, $1, 1
+; MIPS32-NEXT:    sll $2, $2, 1
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    or $2, $1, $2
+; MIPS32-NEXT:    sll $7, $3, 24
+; MIPS32-NEXT:    srl $1, $3, 24
+; MIPS32-NEXT:    or $1, $1, $7
+; MIPS32-NEXT:    andi $7, $3, 65280
+; MIPS32-NEXT:    sll $7, $7, 8
+; MIPS32-NEXT:    or $1, $1, $7
+; MIPS32-NEXT:    srl $3, $3, 8
+; MIPS32-NEXT:    andi $3, $3, 65280
+; MIPS32-NEXT:    or $3, $1, $3
+; MIPS32-NEXT:    and $1, $3, $6
+; MIPS32-NEXT:    srl $1, $1, 4
+; MIPS32-NEXT:    sll $3, $3, 4
+; MIPS32-NEXT:    and $3, $3, $6
+; MIPS32-NEXT:    or $3, $1, $3
+; MIPS32-NEXT:    and $1, $3, $5
+; MIPS32-NEXT:    srl $1, $1, 2
+; MIPS32-NEXT:    sll $3, $3, 2
+; MIPS32-NEXT:    and $3, $3, $5
+; MIPS32-NEXT:    or $3, $1, $3
+; MIPS32-NEXT:    and $1, $3, $4
+; MIPS32-NEXT:    srl $1, $1, 1
+; MIPS32-NEXT:    sll $3, $3, 1
+; MIPS32-NEXT:    and $3, $3, $4
+; MIPS32-NEXT:    or $3, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R2-LABEL: bitreverse_i64:
 ; MIPS32R2:       # %bb.0: # %entry
-; MIPS32R2-NEXT:    wsbh $1, $5
-; MIPS32R2-NEXT:    rotr $1, $1, 16
+; MIPS32R2-NEXT:    move $1, $4
+; MIPS32R2-NEXT:    wsbh $2, $5
+; MIPS32R2-NEXT:    rotr $3, $2, 16
 ; MIPS32R2-NEXT:    lui $2, 61680
-; MIPS32R2-NEXT:    ori $2, $2, 61680
-; MIPS32R2-NEXT:    and $3, $1, $2
-; MIPS32R2-NEXT:    srl $3, $3, 4
-; MIPS32R2-NEXT:    sll $1, $1, 4
-; MIPS32R2-NEXT:    and $1, $1, $2
-; MIPS32R2-NEXT:    or $1, $3, $1
-; MIPS32R2-NEXT:    lui $3, 52428
-; MIPS32R2-NEXT:    ori $3, $3, 52428
-; MIPS32R2-NEXT:    and $5, $1, $3
-; MIPS32R2-NEXT:    srl $5, $5, 2
-; MIPS32R2-NEXT:    sll $1, $1, 2
-; MIPS32R2-NEXT:    and $1, $1, $3
-; MIPS32R2-NEXT:    or $1, $5, $1
-; MIPS32R2-NEXT:    lui $5, 43690
-; MIPS32R2-NEXT:    ori $5, $5, 43690
-; MIPS32R2-NEXT:    and $6, $1, $5
-; MIPS32R2-NEXT:    srl $6, $6, 1
-; MIPS32R2-NEXT:    sll $1, $1, 1
-; MIPS32R2-NEXT:    and $1, $1, $5
-; MIPS32R2-NEXT:    or $1, $6, $1
-; MIPS32R2-NEXT:    wsbh $4, $4
-; MIPS32R2-NEXT:    rotr $4, $4, 16
-; MIPS32R2-NEXT:    and $6, $4, $2
-; MIPS32R2-NEXT:    srl $6, $6, 4
-; MIPS32R2-NEXT:    sll $4, $4, 4
-; MIPS32R2-NEXT:    and $2, $4, $2
-; MIPS32R2-NEXT:    or $2, $6, $2
-; MIPS32R2-NEXT:    and $4, $2, $3
-; MIPS32R2-NEXT:    srl $4, $4, 2
-; MIPS32R2-NEXT:    sll $2, $2, 2
-; MIPS32R2-NEXT:    and $2, $2, $3
-; MIPS32R2-NEXT:    or $2, $4, $2
-; MIPS32R2-NEXT:    and $3, $2, $5
-; MIPS32R2-NEXT:    srl $3, $3, 1
-; MIPS32R2-NEXT:    sll $2, $2, 1
-; MIPS32R2-NEXT:    and $2, $2, $5
-; MIPS32R2-NEXT:    or $3, $3, $2
-; MIPS32R2-NEXT:    move $2, $1
+; MIPS32R2-NEXT:    ori $6, $2, 61680
+; MIPS32R2-NEXT:    and $2, $3, $6
+; MIPS32R2-NEXT:    srl $2, $2, 4
+; MIPS32R2-NEXT:    sll $3, $3, 4
+; MIPS32R2-NEXT:    and $3, $3, $6
+; MIPS32R2-NEXT:    or $3, $2, $3
+; MIPS32R2-NEXT:    lui $2, 52428
+; MIPS32R2-NEXT:    ori $5, $2, 52428
+; MIPS32R2-NEXT:    and $2, $3, $5
+; MIPS32R2-NEXT:    srl $2, $2, 2
+; MIPS32R2-NEXT:    sll $3, $3, 2
+; MIPS32R2-NEXT:    and $3, $3, $5
+; MIPS32R2-NEXT:    or $3, $2, $3
+; MIPS32R2-NEXT:    lui $2, 43690
+; MIPS32R2-NEXT:    ori $4, $2, 43690
+; MIPS32R2-NEXT:    and $2, $3, $4
+; MIPS32R2-NEXT:    srl $2, $2, 1
+; MIPS32R2-NEXT:    sll $3, $3, 1
+; MIPS32R2-NEXT:    and $3, $3, $4
+; MIPS32R2-NEXT:    or $2, $2, $3
+; MIPS32R2-NEXT:    wsbh $1, $1
+; MIPS32R2-NEXT:    rotr $3, $1, 16
+; MIPS32R2-NEXT:    and $1, $3, $6
+; MIPS32R2-NEXT:    srl $1, $1, 4
+; MIPS32R2-NEXT:    sll $3, $3, 4
+; MIPS32R2-NEXT:    and $3, $3, $6
+; MIPS32R2-NEXT:    or $3, $1, $3
+; MIPS32R2-NEXT:    and $1, $3, $5
+; MIPS32R2-NEXT:    srl $1, $1, 2
+; MIPS32R2-NEXT:    sll $3, $3, 2
+; MIPS32R2-NEXT:    and $3, $3, $5
+; MIPS32R2-NEXT:    or $3, $1, $3
+; MIPS32R2-NEXT:    and $1, $3, $4
+; MIPS32R2-NEXT:    srl $1, $1, 1
+; MIPS32R2-NEXT:    sll $3, $3, 1
+; MIPS32R2-NEXT:    and $3, $3, $4
+; MIPS32R2-NEXT:    or $3, $1, $3
 ; MIPS32R2-NEXT:    jr $ra
 ; MIPS32R2-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
index 4022efcafb64..803b76cbc51a 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll
@@ -320,10 +320,10 @@ define i8 @ashr_i8(i8 %a) {
 ; MIPS32-LABEL: ashr_i8:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    ori $1, $zero, 2
-; MIPS32-NEXT:    andi $1, $1, 255
-; MIPS32-NEXT:    sll $2, $4, 24
-; MIPS32-NEXT:    sra $2, $2, 24
-; MIPS32-NEXT:    srav $2, $2, $1
+; MIPS32-NEXT:    andi $2, $1, 255
+; MIPS32-NEXT:    sll $1, $4, 24
+; MIPS32-NEXT:    sra $1, $1, 24
+; MIPS32-NEXT:    srav $2, $1, $2
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -335,9 +335,9 @@ define i16 @lshr_i16(i16 %a) {
 ; MIPS32-LABEL: lshr_i16:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    ori $1, $zero, 2
-; MIPS32-NEXT:    andi $1, $1, 65535
-; MIPS32-NEXT:    andi $2, $4, 65535
-; MIPS32-NEXT:    srlv $2, $2, $1
+; MIPS32-NEXT:    andi $2, $1, 65535
+; MIPS32-NEXT:    andi $1, $4, 65535
+; MIPS32-NEXT:    srlv $2, $1, $2
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -348,29 +348,25 @@ entry:
 define i64 @shl_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: shl_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $sp, $sp, -8
-; MIPS32-NEXT:    .cfi_def_cfa_offset 8
+; MIPS32-NEXT:    move $3, $4
+; MIPS32-NEXT:    move $9, $6
 ; MIPS32-NEXT:    ori $1, $zero, 32
-; MIPS32-NEXT:    subu $2, $6, $1
-; MIPS32-NEXT:    subu $3, $1, $6
-; MIPS32-NEXT:    ori $8, $zero, 0
-; MIPS32-NEXT:    sltu $1, $6, $1
-; MIPS32-NEXT:    sltiu $9, $6, 1
-; MIPS32-NEXT:    sllv $10, $4, $6
-; MIPS32-NEXT:    srlv $3, $4, $3
-; MIPS32-NEXT:    sllv $6, $5, $6
-; MIPS32-NEXT:    or $3, $3, $6
-; MIPS32-NEXT:    sllv $2, $4, $2
-; MIPS32-NEXT:    andi $4, $1, 1
-; MIPS32-NEXT:    movn $8, $10, $4
+; MIPS32-NEXT:    subu $8, $9, $1
+; MIPS32-NEXT:    subu $4, $1, $9
+; MIPS32-NEXT:    ori $2, $zero, 0
+; MIPS32-NEXT:    sltu $6, $9, $1
+; MIPS32-NEXT:    sltiu $1, $9, 1
+; MIPS32-NEXT:    sllv $7, $3, $9
+; MIPS32-NEXT:    srlv $4, $3, $4
+; MIPS32-NEXT:    sllv $9, $5, $9
+; MIPS32-NEXT:    or $4, $4, $9
+; MIPS32-NEXT:    sllv $3, $3, $8
+; MIPS32-NEXT:    andi $8, $6, 1
+; MIPS32-NEXT:    movn $2, $7, $8
+; MIPS32-NEXT:    andi $6, $6, 1
+; MIPS32-NEXT:    movn $3, $4, $6
 ; MIPS32-NEXT:    andi $1, $1, 1
-; MIPS32-NEXT:    movn $2, $3, $1
-; MIPS32-NEXT:    andi $1, $9, 1
-; MIPS32-NEXT:    movn $2, $5, $1
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    move $2, $8
-; MIPS32-NEXT:    lw $3, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    addiu $sp, $sp, 8
+; MIPS32-NEXT:    movn $3, $5, $1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -381,24 +377,30 @@ entry:
 define i64 @ashl_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: ashl_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    ori $1, $zero, 32
-; MIPS32-NEXT:    subu $2, $6, $1
-; MIPS32-NEXT:    subu $3, $1, $6
-; MIPS32-NEXT:    sltu $1, $6, $1
-; MIPS32-NEXT:    sltiu $8, $6, 1
-; MIPS32-NEXT:    srav $9, $5, $6
-; MIPS32-NEXT:    srlv $6, $4, $6
-; MIPS32-NEXT:    sllv $3, $5, $3
-; MIPS32-NEXT:    or $3, $6, $3
-; MIPS32-NEXT:    sra $6, $5, 31
-; MIPS32-NEXT:    srav $2, $5, $2
-; MIPS32-NEXT:    andi $5, $1, 1
-; MIPS32-NEXT:    movn $2, $3, $5
-; MIPS32-NEXT:    andi $3, $8, 1
-; MIPS32-NEXT:    movn $2, $4, $3
-; MIPS32-NEXT:    andi $1, $1, 1
-; MIPS32-NEXT:    movn $6, $9, $1
+; MIPS32-NEXT:    addiu $sp, $sp, -8
+; MIPS32-NEXT:    .cfi_def_cfa_offset 8
+; MIPS32-NEXT:    sw $4, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $2, $5
+; MIPS32-NEXT:    lw $5, 4($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    move $3, $6
+; MIPS32-NEXT:    ori $1, $zero, 32
+; MIPS32-NEXT:    subu $8, $3, $1
+; MIPS32-NEXT:    subu $7, $1, $3
+; MIPS32-NEXT:    sltu $4, $3, $1
+; MIPS32-NEXT:    sltiu $6, $3, 1
+; MIPS32-NEXT:    srav $1, $2, $3
+; MIPS32-NEXT:    srlv $3, $5, $3
+; MIPS32-NEXT:    sllv $7, $2, $7
+; MIPS32-NEXT:    or $7, $3, $7
+; MIPS32-NEXT:    sra $3, $2, 31
+; MIPS32-NEXT:    srav $2, $2, $8
+; MIPS32-NEXT:    andi $8, $4, 1
+; MIPS32-NEXT:    movn $2, $7, $8
+; MIPS32-NEXT:    andi $6, $6, 1
+; MIPS32-NEXT:    movn $2, $5, $6
+; MIPS32-NEXT:    andi $4, $4, 1
+; MIPS32-NEXT:    movn $3, $1, $4
+; MIPS32-NEXT:    addiu $sp, $sp, 8
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -409,24 +411,30 @@ entry:
 define i64 @lshr_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: lshr_i64:
 ; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    addiu $sp, $sp, -8
+; MIPS32-NEXT:    .cfi_def_cfa_offset 8
+; MIPS32-NEXT:    sw $4, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $2, $5
+; MIPS32-NEXT:    lw $5, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    move $7, $6
 ; MIPS32-NEXT:    ori $1, $zero, 32
-; MIPS32-NEXT:    subu $2, $6, $1
-; MIPS32-NEXT:    subu $3, $1, $6
-; MIPS32-NEXT:    ori $8, $zero, 0
-; MIPS32-NEXT:    sltu $1, $6, $1
-; MIPS32-NEXT:    sltiu $9, $6, 1
-; MIPS32-NEXT:    srlv $10, $5, $6
-; MIPS32-NEXT:    srlv $6, $4, $6
-; MIPS32-NEXT:    sllv $3, $5, $3
-; MIPS32-NEXT:    or $3, $6, $3
-; MIPS32-NEXT:    srlv $2, $5, $2
-; MIPS32-NEXT:    andi $5, $1, 1
-; MIPS32-NEXT:    movn $2, $3, $5
-; MIPS32-NEXT:    andi $3, $9, 1
-; MIPS32-NEXT:    movn $2, $4, $3
-; MIPS32-NEXT:    andi $1, $1, 1
-; MIPS32-NEXT:    movn $8, $10, $1
-; MIPS32-NEXT:    move $3, $8
+; MIPS32-NEXT:    subu $8, $7, $1
+; MIPS32-NEXT:    subu $9, $1, $7
+; MIPS32-NEXT:    ori $3, $zero, 0
+; MIPS32-NEXT:    sltu $4, $7, $1
+; MIPS32-NEXT:    sltiu $6, $7, 1
+; MIPS32-NEXT:    srlv $1, $2, $7
+; MIPS32-NEXT:    srlv $7, $5, $7
+; MIPS32-NEXT:    sllv $9, $2, $9
+; MIPS32-NEXT:    or $7, $7, $9
+; MIPS32-NEXT:    srlv $2, $2, $8
+; MIPS32-NEXT:    andi $8, $4, 1
+; MIPS32-NEXT:    movn $2, $7, $8
+; MIPS32-NEXT:    andi $6, $6, 1
+; MIPS32-NEXT:    movn $2, $5, $6
+; MIPS32-NEXT:    andi $4, $4, 1
+; MIPS32-NEXT:    movn $3, $1, $4
+; MIPS32-NEXT:    addiu $sp, $sp, 8
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll
index 4600142cb9be..6ce601a94ef7 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll
@@ -30,21 +30,21 @@ define i32 @Conditional_branch(i1 %cond, i32 %a, i32 %b) {
 ; MIPS32:       # %bb.0:
 ; MIPS32-NEXT:    addiu $sp, $sp, -8
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 8
+; MIPS32-NEXT:    sw $5, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $6, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    andi $1, $4, 1
-; MIPS32-NEXT:    sw $5, 4($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $6, 0($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    bnez $1, $BB1_2
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.1:
 ; MIPS32-NEXT:    j $BB1_3
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_2: # %if.then
-; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 0($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 8
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_3: # %if.else
-; MIPS32-NEXT:    lw $2, 0($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 8
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll
index 568558538cdb..9bb803f4cfd3 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll
@@ -6,19 +6,19 @@ define i32 @indirectbr(i8 *%addr) {
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    addiu $sp, $sp, -8
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 8
-; MIPS32-NEXT:    ori $2, $zero, 1
-; MIPS32-NEXT:    ori $1, $zero, 0
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $1, $zero, 1
 ; MIPS32-NEXT:    sw $1, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $1, $zero, 0
+; MIPS32-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    jr $4
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_1: # %L1
-; MIPS32-NEXT:    lw $2, 0($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 8
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_2: # %L2
-; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 0($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 8
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll
index 3b371cca6fe9..b168e13b7f55 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll
@@ -6,9 +6,9 @@ declare i32 @llvm.bswap.i32(i32)
 define i32 @bswap_i32(i32 %x) {
 ; MIPS32-LABEL: bswap_i32:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sll $1, $4, 24
-; MIPS32-NEXT:    srl $2, $4, 24
-; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    sll $2, $4, 24
+; MIPS32-NEXT:    srl $1, $4, 24
+; MIPS32-NEXT:    or $1, $1, $2
 ; MIPS32-NEXT:    andi $2, $4, 65280
 ; MIPS32-NEXT:    sll $2, $2, 8
 ; MIPS32-NEXT:    or $1, $1, $2
@@ -33,18 +33,18 @@ declare i64 @llvm.bswap.i64(i64)
 define i64 @bswap_i64(i64 %x) {
 ; MIPS32-LABEL: bswap_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sll $1, $5, 24
-; MIPS32-NEXT:    srl $2, $5, 24
-; MIPS32-NEXT:    or $1, $2, $1
+; MIPS32-NEXT:    sll $2, $5, 24
+; MIPS32-NEXT:    srl $1, $5, 24
+; MIPS32-NEXT:    or $1, $1, $2
 ; MIPS32-NEXT:    andi $2, $5, 65280
 ; MIPS32-NEXT:    sll $2, $2, 8
 ; MIPS32-NEXT:    or $1, $1, $2
 ; MIPS32-NEXT:    srl $2, $5, 8
 ; MIPS32-NEXT:    andi $2, $2, 65280
 ; MIPS32-NEXT:    or $2, $1, $2
-; MIPS32-NEXT:    sll $1, $4, 24
-; MIPS32-NEXT:    srl $3, $4, 24
-; MIPS32-NEXT:    or $1, $3, $1
+; MIPS32-NEXT:    sll $3, $4, 24
+; MIPS32-NEXT:    srl $1, $4, 24
+; MIPS32-NEXT:    or $1, $1, $3
 ; MIPS32-NEXT:    andi $3, $4, 65280
 ; MIPS32-NEXT:    sll $3, $3, 8
 ; MIPS32-NEXT:    or $1, $1, $3

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
index f7952e446236..0312f49fa6ee 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll
@@ -29,11 +29,10 @@ define i32 @call_global(i32 %a0, i32 %a1, i32 %x, i32 %y) {
 ; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
 ; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
-; MIPS32_PIC-NEXT:    addu $1, $2, $25
-; MIPS32_PIC-NEXT:    lw $25, %call16(f)($1)
+; MIPS32_PIC-NEXT:    addu $gp, $2, $25
 ; MIPS32_PIC-NEXT:    move $4, $6
 ; MIPS32_PIC-NEXT:    move $5, $7
-; MIPS32_PIC-NEXT:    move $gp, $1
+; MIPS32_PIC-NEXT:    lw $25, %call16(f)($gp)
 ; MIPS32_PIC-NEXT:    jalr $25
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:    addu $2, $2, $2
@@ -89,12 +88,11 @@ define i32 @call_global_with_local_linkage(i32 %a0, i32 %a1, i32 %x, i32 %y) {
 ; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
 ; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
-; MIPS32_PIC-NEXT:    addu $1, $2, $25
-; MIPS32_PIC-NEXT:    lw $2, %got(f_with_local_linkage)($1)
-; MIPS32_PIC-NEXT:    addiu $25, $2, %lo(f_with_local_linkage)
+; MIPS32_PIC-NEXT:    addu $gp, $2, $25
 ; MIPS32_PIC-NEXT:    move $4, $6
 ; MIPS32_PIC-NEXT:    move $5, $7
-; MIPS32_PIC-NEXT:    move $gp, $1
+; MIPS32_PIC-NEXT:    lw $1, %got(f_with_local_linkage)($gp)
+; MIPS32_PIC-NEXT:    addiu $25, $1, %lo(f_with_local_linkage)
 ; MIPS32_PIC-NEXT:    jalr $25
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:    addu $2, $2, $2
@@ -115,10 +113,9 @@ define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) {
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 24
 ; MIPS32-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    .cfi_offset 31, -4
-; MIPS32-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $25, $4
 ; MIPS32-NEXT:    move $4, $5
 ; MIPS32-NEXT:    move $5, $6
-; MIPS32-NEXT:    lw $25, 16($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    jalr $25
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
@@ -132,10 +129,9 @@ define i32 @call_reg(i32 (i32, i32)* %f_ptr, i32 %x, i32 %y) {
 ; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
 ; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
-; MIPS32_PIC-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    move $25, $4
 ; MIPS32_PIC-NEXT:    move $4, $5
 ; MIPS32_PIC-NEXT:    move $5, $6
-; MIPS32_PIC-NEXT:    lw $25, 16($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    jalr $25
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll
index 4030cfbf57e6..65fce9d4f5d5 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll
@@ -17,14 +17,14 @@ declare i32 @llvm.ctlz.i32(i32, i1 immarg)
 define i64 @ctlz_i64(i64 %a) {
 ; MIPS32-LABEL: ctlz_i64:
 ; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    move $1, $4
 ; MIPS32-NEXT:    ori $3, $zero, 0
-; MIPS32-NEXT:    sltiu $1, $5, 1
-; MIPS32-NEXT:    clz $2, $4
-; MIPS32-NEXT:    addiu $2, $2, 32
-; MIPS32-NEXT:    clz $4, $5
-; MIPS32-NEXT:    andi $1, $1, 1
-; MIPS32-NEXT:    movn $4, $2, $1
-; MIPS32-NEXT:    move $2, $4
+; MIPS32-NEXT:    sltiu $4, $5, 1
+; MIPS32-NEXT:    clz $1, $1
+; MIPS32-NEXT:    addiu $1, $1, 32
+; MIPS32-NEXT:    clz $2, $5
+; MIPS32-NEXT:    andi $4, $4, 1
+; MIPS32-NEXT:    movn $2, $1, $4
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll
index 5d7a2f23eac1..7ac9c4332fed 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll
@@ -8,15 +8,15 @@ define i32 @ctpop_i32(i32 %a) {
 ; MIPS32-NEXT:    lui $2, 21845
 ; MIPS32-NEXT:    ori $2, $2, 21845
 ; MIPS32-NEXT:    and $1, $1, $2
-; MIPS32-NEXT:    subu $1, $4, $1
-; MIPS32-NEXT:    srl $2, $1, 2
+; MIPS32-NEXT:    subu $2, $4, $1
+; MIPS32-NEXT:    srl $1, $2, 2
 ; MIPS32-NEXT:    lui $3, 13107
 ; MIPS32-NEXT:    ori $3, $3, 13107
-; MIPS32-NEXT:    and $2, $2, $3
 ; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    addu $1, $2, $1
-; MIPS32-NEXT:    srl $2, $1, 4
-; MIPS32-NEXT:    addu $1, $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    addu $2, $1, $2
+; MIPS32-NEXT:    srl $1, $2, 4
+; MIPS32-NEXT:    addu $1, $1, $2
 ; MIPS32-NEXT:    lui $2, 3855
 ; MIPS32-NEXT:    ori $2, $2, 3855
 ; MIPS32-NEXT:    and $1, $1, $2
@@ -38,37 +38,37 @@ define i64 @ctpop_i64(i64 %a) {
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    srl $1, $4, 1
 ; MIPS32-NEXT:    lui $2, 21845
-; MIPS32-NEXT:    ori $2, $2, 21845
-; MIPS32-NEXT:    and $1, $1, $2
-; MIPS32-NEXT:    subu $1, $4, $1
-; MIPS32-NEXT:    srl $3, $1, 2
-; MIPS32-NEXT:    lui $4, 13107
-; MIPS32-NEXT:    ori $4, $4, 13107
-; MIPS32-NEXT:    and $3, $3, $4
+; MIPS32-NEXT:    ori $7, $2, 21845
+; MIPS32-NEXT:    and $1, $1, $7
+; MIPS32-NEXT:    subu $2, $4, $1
+; MIPS32-NEXT:    srl $1, $2, 2
+; MIPS32-NEXT:    lui $3, 13107
+; MIPS32-NEXT:    ori $6, $3, 13107
+; MIPS32-NEXT:    and $1, $1, $6
+; MIPS32-NEXT:    and $2, $2, $6
+; MIPS32-NEXT:    addu $2, $1, $2
+; MIPS32-NEXT:    srl $1, $2, 4
+; MIPS32-NEXT:    addu $1, $1, $2
+; MIPS32-NEXT:    lui $2, 3855
+; MIPS32-NEXT:    ori $4, $2, 3855
 ; MIPS32-NEXT:    and $1, $1, $4
-; MIPS32-NEXT:    addu $1, $3, $1
-; MIPS32-NEXT:    srl $3, $1, 4
-; MIPS32-NEXT:    addu $1, $3, $1
-; MIPS32-NEXT:    lui $3, 3855
-; MIPS32-NEXT:    ori $3, $3, 3855
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    lui $6, 257
-; MIPS32-NEXT:    ori $6, $6, 257
-; MIPS32-NEXT:    mul $1, $1, $6
+; MIPS32-NEXT:    lui $2, 257
+; MIPS32-NEXT:    ori $3, $2, 257
+; MIPS32-NEXT:    mul $1, $1, $3
+; MIPS32-NEXT:    srl $2, $1, 24
+; MIPS32-NEXT:    srl $1, $5, 1
+; MIPS32-NEXT:    and $1, $1, $7
+; MIPS32-NEXT:    subu $5, $5, $1
+; MIPS32-NEXT:    srl $1, $5, 2
+; MIPS32-NEXT:    and $1, $1, $6
+; MIPS32-NEXT:    and $5, $5, $6
+; MIPS32-NEXT:    addu $5, $1, $5
+; MIPS32-NEXT:    srl $1, $5, 4
+; MIPS32-NEXT:    addu $1, $1, $5
+; MIPS32-NEXT:    and $1, $1, $4
+; MIPS32-NEXT:    mul $1, $1, $3
 ; MIPS32-NEXT:    srl $1, $1, 24
-; MIPS32-NEXT:    srl $7, $5, 1
-; MIPS32-NEXT:    and $2, $7, $2
-; MIPS32-NEXT:    subu $2, $5, $2
-; MIPS32-NEXT:    srl $5, $2, 2
-; MIPS32-NEXT:    and $5, $5, $4
-; MIPS32-NEXT:    and $2, $2, $4
-; MIPS32-NEXT:    addu $2, $5, $2
-; MIPS32-NEXT:    srl $4, $2, 4
-; MIPS32-NEXT:    addu $2, $4, $2
-; MIPS32-NEXT:    and $2, $2, $3
-; MIPS32-NEXT:    mul $2, $2, $6
-; MIPS32-NEXT:    srl $2, $2, 24
-; MIPS32-NEXT:    addu $2, $2, $1
+; MIPS32-NEXT:    addu $2, $1, $2
 ; MIPS32-NEXT:    ori $3, $zero, 0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll
index 3ea5329da548..44a2e619f715 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll
@@ -6,10 +6,10 @@ define i32 @cttz_i32(i32 %a) {
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    not $1, $4
 ; MIPS32-NEXT:    addiu $2, $4, -1
-; MIPS32-NEXT:    and $1, $1, $2
-; MIPS32-NEXT:    ori $2, $zero, 32
-; MIPS32-NEXT:    clz $1, $1
-; MIPS32-NEXT:    subu $2, $2, $1
+; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    ori $1, $zero, 32
+; MIPS32-NEXT:    clz $2, $2
+; MIPS32-NEXT:    subu $2, $1, $2
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -21,23 +21,23 @@ declare i32 @llvm.cttz.i32(i32, i1 immarg)
 define i64 @cttz_i64(i64  %a) {
 ; MIPS32-LABEL: cttz_i64:
 ; MIPS32:       # %bb.0: # %entry
+; MIPS32-NEXT:    move $6, $4
 ; MIPS32-NEXT:    ori $3, $zero, 0
-; MIPS32-NEXT:    sltiu $1, $4, 1
-; MIPS32-NEXT:    not $2, $5
-; MIPS32-NEXT:    addiu $5, $5, -1
-; MIPS32-NEXT:    and $2, $2, $5
-; MIPS32-NEXT:    ori $5, $zero, 32
-; MIPS32-NEXT:    clz $2, $2
-; MIPS32-NEXT:    subu $2, $5, $2
-; MIPS32-NEXT:    addiu $2, $2, 32
-; MIPS32-NEXT:    not $6, $4
-; MIPS32-NEXT:    addiu $4, $4, -1
-; MIPS32-NEXT:    and $4, $6, $4
-; MIPS32-NEXT:    clz $4, $4
-; MIPS32-NEXT:    subu $4, $5, $4
-; MIPS32-NEXT:    andi $1, $1, 1
-; MIPS32-NEXT:    movn $4, $2, $1
-; MIPS32-NEXT:    move $2, $4
+; MIPS32-NEXT:    sltiu $4, $6, 1
+; MIPS32-NEXT:    not $1, $5
+; MIPS32-NEXT:    addiu $2, $5, -1
+; MIPS32-NEXT:    and $1, $1, $2
+; MIPS32-NEXT:    ori $2, $zero, 32
+; MIPS32-NEXT:    clz $1, $1
+; MIPS32-NEXT:    subu $1, $2, $1
+; MIPS32-NEXT:    addiu $1, $1, 32
+; MIPS32-NEXT:    not $5, $6
+; MIPS32-NEXT:    addiu $6, $6, -1
+; MIPS32-NEXT:    and $5, $5, $6
+; MIPS32-NEXT:    clz $5, $5
+; MIPS32-NEXT:    subu $2, $2, $5
+; MIPS32-NEXT:    andi $4, $4, 1
+; MIPS32-NEXT:    movn $2, $1, $4
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -53,10 +53,10 @@ define i32 @ffs_i32_expansion(i32 %a) {
 ; MIPS32-NEXT:    ori $1, $zero, 0
 ; MIPS32-NEXT:    not $2, $4
 ; MIPS32-NEXT:    addiu $3, $4, -1
-; MIPS32-NEXT:    and $2, $2, $3
-; MIPS32-NEXT:    ori $3, $zero, 32
-; MIPS32-NEXT:    clz $2, $2
-; MIPS32-NEXT:    subu $2, $3, $2
+; MIPS32-NEXT:    and $3, $2, $3
+; MIPS32-NEXT:    ori $2, $zero, 32
+; MIPS32-NEXT:    clz $3, $3
+; MIPS32-NEXT:    subu $2, $2, $3
 ; MIPS32-NEXT:    addiu $2, $2, 1
 ; MIPS32-NEXT:    sltiu $3, $4, 1
 ; MIPS32-NEXT:    andi $3, $3, 1
@@ -74,37 +74,35 @@ entry:
 define i64 @ffs_i64_expansion(i64 %a) {
 ; MIPS32-LABEL: ffs_i64_expansion:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    ori $1, $zero, 1
-; MIPS32-NEXT:    ori $2, $zero, 0
-; MIPS32-NEXT:    sltiu $3, $4, 1
-; MIPS32-NEXT:    not $6, $5
-; MIPS32-NEXT:    addiu $7, $5, -1
-; MIPS32-NEXT:    and $6, $6, $7
-; MIPS32-NEXT:    ori $7, $zero, 32
+; MIPS32-NEXT:    ori $3, $zero, 1
+; MIPS32-NEXT:    ori $1, $zero, 0
+; MIPS32-NEXT:    sltiu $7, $4, 1
+; MIPS32-NEXT:    not $2, $5
+; MIPS32-NEXT:    addiu $6, $5, -1
+; MIPS32-NEXT:    and $6, $2, $6
+; MIPS32-NEXT:    ori $2, $zero, 32
 ; MIPS32-NEXT:    clz $6, $6
-; MIPS32-NEXT:    subu $6, $7, $6
+; MIPS32-NEXT:    subu $6, $2, $6
 ; MIPS32-NEXT:    addiu $6, $6, 32
 ; MIPS32-NEXT:    not $8, $4
 ; MIPS32-NEXT:    addiu $9, $4, -1
 ; MIPS32-NEXT:    and $8, $8, $9
 ; MIPS32-NEXT:    clz $8, $8
-; MIPS32-NEXT:    subu $7, $7, $8
-; MIPS32-NEXT:    andi $3, $3, 1
-; MIPS32-NEXT:    movn $7, $6, $3
-; MIPS32-NEXT:    addiu $3, $7, 1
-; MIPS32-NEXT:    sltu $1, $3, $1
-; MIPS32-NEXT:    addiu $6, $2, 0
-; MIPS32-NEXT:    andi $1, $1, 1
-; MIPS32-NEXT:    addu $1, $6, $1
+; MIPS32-NEXT:    subu $2, $2, $8
+; MIPS32-NEXT:    andi $7, $7, 1
+; MIPS32-NEXT:    movn $2, $6, $7
+; MIPS32-NEXT:    addiu $2, $2, 1
+; MIPS32-NEXT:    sltu $6, $2, $3
+; MIPS32-NEXT:    addiu $3, $1, 0
+; MIPS32-NEXT:    andi $6, $6, 1
+; MIPS32-NEXT:    addu $3, $3, $6
 ; MIPS32-NEXT:    xori $4, $4, 0
 ; MIPS32-NEXT:    xori $5, $5, 0
 ; MIPS32-NEXT:    or $4, $4, $5
 ; MIPS32-NEXT:    sltiu $4, $4, 1
 ; MIPS32-NEXT:    andi $4, $4, 1
-; MIPS32-NEXT:    movn $3, $2, $4
-; MIPS32-NEXT:    movn $1, $2, $4
-; MIPS32-NEXT:    move $2, $3
-; MIPS32-NEXT:    move $3, $1
+; MIPS32-NEXT:    movn $2, $1, $4
+; MIPS32-NEXT:    movn $3, $1, $4
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll
index fcc2d6ef0a93..294bc71443ea 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll
@@ -15,35 +15,32 @@ define void @Print_c_N_times(i8 %c, i32 %N) {
 ; MIPS32-NEXT:    .cfi_offset 30, -8
 ; MIPS32-NEXT:    move $fp, $sp
 ; MIPS32-NEXT:    .cfi_def_cfa_register 30
-; MIPS32-NEXT:    ori $1, $zero, 1
-; MIPS32-NEXT:    ori $2, $zero, 0
-; MIPS32-NEXT:    addiu $3, $5, 1
-; MIPS32-NEXT:    mul $1, $3, $1
+; MIPS32-NEXT:    sw $4, 8($fp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $6, $5
+; MIPS32-NEXT:    lw $5, 8($fp) # 4-byte Folded Reload
+; MIPS32-NEXT:    sw $6, 12($fp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $2, $zero, 1
+; MIPS32-NEXT:    ori $1, $zero, 0
+; MIPS32-NEXT:    sw $1, 16($fp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $6, 1
+; MIPS32-NEXT:    mul $1, $1, $2
 ; MIPS32-NEXT:    addiu $1, $1, 7
-; MIPS32-NEXT:    addiu $3, $zero, 65528
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    move $3, $sp
-; MIPS32-NEXT:    subu $1, $3, $1
-; MIPS32-NEXT:    move $sp, $1
-; MIPS32-NEXT:    addiu $sp, $sp, -16
+; MIPS32-NEXT:    addiu $2, $zero, 65528
+; MIPS32-NEXT:    and $2, $1, $2
+; MIPS32-NEXT:    move $1, $sp
+; MIPS32-NEXT:    subu $4, $1, $2
 ; MIPS32-NEXT:    sw $4, 20($fp) # 4-byte Folded Spill
-; MIPS32-NEXT:    move $4, $1
-; MIPS32-NEXT:    lw $3, 20($fp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sw $5, 16($fp) # 4-byte Folded Spill
-; MIPS32-NEXT:    move $5, $3
-; MIPS32-NEXT:    lw $6, 16($fp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sw $2, 12($fp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $1, 8($fp) # 4-byte Folded Spill
+; MIPS32-NEXT:    move $sp, $4
+; MIPS32-NEXT:    addiu $sp, $sp, -16
 ; MIPS32-NEXT:    jal memset
 ; MIPS32-NEXT:    nop
+; MIPS32-NEXT:    lw $5, 12($fp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 16($fp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $4, 20($fp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 16
-; MIPS32-NEXT:    lw $1, 8($fp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 16($fp) # 4-byte Folded Reload
-; MIPS32-NEXT:    addu $3, $1, $2
-; MIPS32-NEXT:    lw $4, 12($fp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sb $4, 0($3)
+; MIPS32-NEXT:    addu $2, $4, $5
+; MIPS32-NEXT:    sb $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, -16
-; MIPS32-NEXT:    move $4, $1
 ; MIPS32-NEXT:    jal puts
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:    addiu $sp, $sp, 16

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
index 58d5c8a160a6..bfff4e72d0ab 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll
@@ -27,10 +27,9 @@ entry:
 define i1 @uno_s(float %x, float %y) {
 ; MIPS32-LABEL: uno_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.un.s $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -40,10 +39,9 @@ entry:
 define i1 @ord_s(float %x, float %y) {
 ; MIPS32-LABEL: ord_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.un.s $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -55,10 +53,9 @@ entry:
 define i1 @oeq_s(float %x, float %y) {
 ; MIPS32-LABEL: oeq_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.eq.s $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -68,10 +65,9 @@ entry:
 define i1 @une_s(float %x, float %y) {
 ; MIPS32-LABEL: une_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.eq.s $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -83,10 +79,9 @@ entry:
 define i1 @ueq_s(float %x, float %y) {
 ; MIPS32-LABEL: ueq_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ueq.s $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -96,10 +91,9 @@ entry:
 define i1 @one_s(float %x, float %y) {
 ; MIPS32-LABEL: one_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ueq.s $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -111,10 +105,9 @@ entry:
 define i1 @olt_s(float %x, float %y) {
 ; MIPS32-LABEL: olt_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.olt.s $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -124,10 +117,9 @@ entry:
 define i1 @uge_s(float %x, float %y) {
 ; MIPS32-LABEL: uge_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.olt.s $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -139,10 +131,9 @@ entry:
 define i1 @ult_s(float %x, float %y) {
 ; MIPS32-LABEL: ult_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ult.s $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -152,10 +143,9 @@ entry:
 define i1 @oge_s(float %x, float %y) {
 ; MIPS32-LABEL: oge_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ult.s $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -167,10 +157,9 @@ entry:
 define i1 @ole_s(float %x, float %y) {
 ; MIPS32-LABEL: ole_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ole.s $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -180,10 +169,9 @@ entry:
 define i1 @ugt_s(float %x, float %y) {
 ; MIPS32-LABEL: ugt_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ole.s $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -195,10 +183,9 @@ entry:
 define i1 @ule_s(float %x, float %y) {
 ; MIPS32-LABEL: ule_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ule.s $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -208,10 +195,9 @@ entry:
 define i1 @ogt_s(float %x, float %y) {
 ; MIPS32-LABEL: ogt_s:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ule.s $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -245,10 +231,9 @@ entry:
 define i1 @uno_d(double %x, double %y) {
 ; MIPS32-LABEL: uno_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.un.d $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -258,10 +243,9 @@ entry:
 define i1 @ord_d(double %x, double %y) {
 ; MIPS32-LABEL: ord_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.un.d $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -273,10 +257,9 @@ entry:
 define i1 @oeq_d(double %x, double %y) {
 ; MIPS32-LABEL: oeq_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.eq.d $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -286,10 +269,9 @@ entry:
 define i1 @une_d(double %x, double %y) {
 ; MIPS32-LABEL: une_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.eq.d $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -301,10 +283,9 @@ entry:
 define i1 @ueq_d(double %x, double %y) {
 ; MIPS32-LABEL: ueq_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ueq.d $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -314,10 +295,9 @@ entry:
 define i1 @one_d(double %x, double %y) {
 ; MIPS32-LABEL: one_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ueq.d $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -329,10 +309,9 @@ entry:
 define i1 @olt_d(double %x, double %y) {
 ; MIPS32-LABEL: olt_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.olt.d $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -342,10 +321,9 @@ entry:
 define i1 @uge_d(double %x, double %y) {
 ; MIPS32-LABEL: uge_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.olt.d $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -357,10 +335,9 @@ entry:
 define i1 @ult_d(double %x, double %y) {
 ; MIPS32-LABEL: ult_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ult.d $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -370,10 +347,9 @@ entry:
 define i1 @oge_d(double %x, double %y) {
 ; MIPS32-LABEL: oge_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ult.d $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -385,10 +361,9 @@ entry:
 define i1 @ole_d(double %x, double %y) {
 ; MIPS32-LABEL: ole_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ole.d $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -398,10 +373,9 @@ entry:
 define i1 @ugt_d(double %x, double %y) {
 ; MIPS32-LABEL: ugt_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ole.d $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -413,10 +387,9 @@ entry:
 define i1 @ule_d(double %x, double %y) {
 ; MIPS32-LABEL: ule_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ule.d $f12, $f14
-; MIPS32-NEXT:    movf $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movf $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -426,10 +399,9 @@ entry:
 define i1 @ogt_d(double %x, double %y) {
 ; MIPS32-LABEL: ogt_d:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    addiu $1, $zero, 1
+; MIPS32-NEXT:    addiu $2, $zero, 1
 ; MIPS32-NEXT:    c.ule.d $f12, $f14
-; MIPS32-NEXT:    movt $1, $zero, $fcc0
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    movt $2, $zero, $fcc0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll
index f4ca9e5b5371..85feeda82e25 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll
@@ -18,22 +18,22 @@ define double @e_double_precision() {
 ; FP32-LABEL: e_double_precision:
 ; FP32:       # %bb.0: # %entry
 ; FP32-NEXT:    lui $1, 16389
-; FP32-NEXT:    ori $1, $1, 48906
-; FP32-NEXT:    lui $2, 35604
-; FP32-NEXT:    ori $2, $2, 22377
-; FP32-NEXT:    mtc1 $2, $f0
-; FP32-NEXT:    mtc1 $1, $f1
+; FP32-NEXT:    ori $2, $1, 48906
+; FP32-NEXT:    lui $1, 35604
+; FP32-NEXT:    ori $1, $1, 22377
+; FP32-NEXT:    mtc1 $1, $f0
+; FP32-NEXT:    mtc1 $2, $f1
 ; FP32-NEXT:    jr $ra
 ; FP32-NEXT:    nop
 ;
 ; FP64-LABEL: e_double_precision:
 ; FP64:       # %bb.0: # %entry
 ; FP64-NEXT:    lui $1, 16389
-; FP64-NEXT:    ori $1, $1, 48906
-; FP64-NEXT:    lui $2, 35604
-; FP64-NEXT:    ori $2, $2, 22377
-; FP64-NEXT:    mtc1 $2, $f0
-; FP64-NEXT:    mthc1 $1, $f0
+; FP64-NEXT:    ori $2, $1, 48906
+; FP64-NEXT:    lui $1, 35604
+; FP64-NEXT:    ori $1, $1, 22377
+; FP64-NEXT:    mtc1 $1, $f0
+; FP64-NEXT:    mthc1 $2, $f0
 ; FP64-NEXT:    jr $ra
 ; FP64-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
index a98c6eb9fd6c..e9cc0b933f71 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll
@@ -164,20 +164,20 @@ define zeroext i16 @f32tou16(float %a) {
 ; MIPS32-LABEL: f32tou16:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    trunc.w.s $f0, $f12
-; MIPS32-NEXT:    mfc1 $1, $f0
-; MIPS32-NEXT:    lui $2, 20224
-; MIPS32-NEXT:    mtc1 $2, $f0
+; MIPS32-NEXT:    mfc1 $2, $f0
+; MIPS32-NEXT:    lui $1, 20224
+; MIPS32-NEXT:    mtc1 $1, $f0
 ; MIPS32-NEXT:    sub.s $f1, $f12, $f0
 ; MIPS32-NEXT:    trunc.w.s $f1, $f1
-; MIPS32-NEXT:    mfc1 $2, $f1
+; MIPS32-NEXT:    mfc1 $1, $f1
 ; MIPS32-NEXT:    lui $3, 32768
-; MIPS32-NEXT:    xor $2, $2, $3
+; MIPS32-NEXT:    xor $1, $1, $3
 ; MIPS32-NEXT:    addiu $3, $zero, 1
 ; MIPS32-NEXT:    c.ult.s $f12, $f0
 ; MIPS32-NEXT:    movf $3, $zero, $fcc0
 ; MIPS32-NEXT:    andi $3, $3, 1
-; MIPS32-NEXT:    movn $2, $1, $3
-; MIPS32-NEXT:    andi $2, $2, 65535
+; MIPS32-NEXT:    movn $1, $2, $3
+; MIPS32-NEXT:    andi $2, $1, 65535
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -189,20 +189,20 @@ define zeroext i8 @f32tou8(float %a) {
 ; MIPS32-LABEL: f32tou8:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    trunc.w.s $f0, $f12
-; MIPS32-NEXT:    mfc1 $1, $f0
-; MIPS32-NEXT:    lui $2, 20224
-; MIPS32-NEXT:    mtc1 $2, $f0
+; MIPS32-NEXT:    mfc1 $2, $f0
+; MIPS32-NEXT:    lui $1, 20224
+; MIPS32-NEXT:    mtc1 $1, $f0
 ; MIPS32-NEXT:    sub.s $f1, $f12, $f0
 ; MIPS32-NEXT:    trunc.w.s $f1, $f1
-; MIPS32-NEXT:    mfc1 $2, $f1
+; MIPS32-NEXT:    mfc1 $1, $f1
 ; MIPS32-NEXT:    lui $3, 32768
-; MIPS32-NEXT:    xor $2, $2, $3
+; MIPS32-NEXT:    xor $1, $1, $3
 ; MIPS32-NEXT:    addiu $3, $zero, 1
 ; MIPS32-NEXT:    c.ult.s $f12, $f0
 ; MIPS32-NEXT:    movf $3, $zero, $fcc0
 ; MIPS32-NEXT:    andi $3, $3, 1
-; MIPS32-NEXT:    movn $2, $1, $3
-; MIPS32-NEXT:    andi $2, $2, 255
+; MIPS32-NEXT:    movn $1, $2, $3
+; MIPS32-NEXT:    andi $2, $1, 255
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -233,10 +233,10 @@ define i32 @f64tou32(double %a) {
 ; FP32:       # %bb.0: # %entry
 ; FP32-NEXT:    trunc.w.d $f0, $f12
 ; FP32-NEXT:    mfc1 $1, $f0
-; FP32-NEXT:    lui $2, 16864
-; FP32-NEXT:    ori $3, $zero, 0
-; FP32-NEXT:    mtc1 $3, $f0
-; FP32-NEXT:    mtc1 $2, $f1
+; FP32-NEXT:    lui $3, 16864
+; FP32-NEXT:    ori $2, $zero, 0
+; FP32-NEXT:    mtc1 $2, $f0
+; FP32-NEXT:    mtc1 $3, $f1
 ; FP32-NEXT:    sub.d $f2, $f12, $f0
 ; FP32-NEXT:    trunc.w.d $f2, $f2
 ; FP32-NEXT:    mfc1 $2, $f2
@@ -254,10 +254,10 @@ define i32 @f64tou32(double %a) {
 ; FP64:       # %bb.0: # %entry
 ; FP64-NEXT:    trunc.w.d $f0, $f12
 ; FP64-NEXT:    mfc1 $1, $f0
-; FP64-NEXT:    lui $2, 16864
-; FP64-NEXT:    ori $3, $zero, 0
-; FP64-NEXT:    mtc1 $3, $f0
-; FP64-NEXT:    mthc1 $2, $f0
+; FP64-NEXT:    lui $3, 16864
+; FP64-NEXT:    ori $2, $zero, 0
+; FP64-NEXT:    mtc1 $2, $f0
+; FP64-NEXT:    mthc1 $3, $f0
 ; FP64-NEXT:    sub.d $f1, $f12, $f0
 ; FP64-NEXT:    trunc.w.d $f1, $f1
 ; FP64-NEXT:    mfc1 $2, $f1
@@ -279,44 +279,44 @@ define zeroext i16 @f64tou16(double %a) {
 ; FP32-LABEL: f64tou16:
 ; FP32:       # %bb.0: # %entry
 ; FP32-NEXT:    trunc.w.d $f0, $f12
-; FP32-NEXT:    mfc1 $1, $f0
-; FP32-NEXT:    lui $2, 16864
-; FP32-NEXT:    ori $3, $zero, 0
-; FP32-NEXT:    mtc1 $3, $f0
-; FP32-NEXT:    mtc1 $2, $f1
+; FP32-NEXT:    mfc1 $2, $f0
+; FP32-NEXT:    lui $3, 16864
+; FP32-NEXT:    ori $1, $zero, 0
+; FP32-NEXT:    mtc1 $1, $f0
+; FP32-NEXT:    mtc1 $3, $f1
 ; FP32-NEXT:    sub.d $f2, $f12, $f0
 ; FP32-NEXT:    trunc.w.d $f2, $f2
-; FP32-NEXT:    mfc1 $2, $f2
+; FP32-NEXT:    mfc1 $1, $f2
 ; FP32-NEXT:    lui $3, 32768
-; FP32-NEXT:    xor $2, $2, $3
+; FP32-NEXT:    xor $1, $1, $3
 ; FP32-NEXT:    addiu $3, $zero, 1
 ; FP32-NEXT:    c.ult.d $f12, $f0
 ; FP32-NEXT:    movf $3, $zero, $fcc0
 ; FP32-NEXT:    andi $3, $3, 1
-; FP32-NEXT:    movn $2, $1, $3
-; FP32-NEXT:    andi $2, $2, 65535
+; FP32-NEXT:    movn $1, $2, $3
+; FP32-NEXT:    andi $2, $1, 65535
 ; FP32-NEXT:    jr $ra
 ; FP32-NEXT:    nop
 ;
 ; FP64-LABEL: f64tou16:
 ; FP64:       # %bb.0: # %entry
 ; FP64-NEXT:    trunc.w.d $f0, $f12
-; FP64-NEXT:    mfc1 $1, $f0
-; FP64-NEXT:    lui $2, 16864
-; FP64-NEXT:    ori $3, $zero, 0
-; FP64-NEXT:    mtc1 $3, $f0
-; FP64-NEXT:    mthc1 $2, $f0
+; FP64-NEXT:    mfc1 $2, $f0
+; FP64-NEXT:    lui $3, 16864
+; FP64-NEXT:    ori $1, $zero, 0
+; FP64-NEXT:    mtc1 $1, $f0
+; FP64-NEXT:    mthc1 $3, $f0
 ; FP64-NEXT:    sub.d $f1, $f12, $f0
 ; FP64-NEXT:    trunc.w.d $f1, $f1
-; FP64-NEXT:    mfc1 $2, $f1
+; FP64-NEXT:    mfc1 $1, $f1
 ; FP64-NEXT:    lui $3, 32768
-; FP64-NEXT:    xor $2, $2, $3
+; FP64-NEXT:    xor $1, $1, $3
 ; FP64-NEXT:    addiu $3, $zero, 1
 ; FP64-NEXT:    c.ult.d $f12, $f0
 ; FP64-NEXT:    movf $3, $zero, $fcc0
 ; FP64-NEXT:    andi $3, $3, 1
-; FP64-NEXT:    movn $2, $1, $3
-; FP64-NEXT:    andi $2, $2, 65535
+; FP64-NEXT:    movn $1, $2, $3
+; FP64-NEXT:    andi $2, $1, 65535
 ; FP64-NEXT:    jr $ra
 ; FP64-NEXT:    nop
 entry:
@@ -328,44 +328,44 @@ define zeroext i8 @f64tou8(double %a) {
 ; FP32-LABEL: f64tou8:
 ; FP32:       # %bb.0: # %entry
 ; FP32-NEXT:    trunc.w.d $f0, $f12
-; FP32-NEXT:    mfc1 $1, $f0
-; FP32-NEXT:    lui $2, 16864
-; FP32-NEXT:    ori $3, $zero, 0
-; FP32-NEXT:    mtc1 $3, $f0
-; FP32-NEXT:    mtc1 $2, $f1
+; FP32-NEXT:    mfc1 $2, $f0
+; FP32-NEXT:    lui $3, 16864
+; FP32-NEXT:    ori $1, $zero, 0
+; FP32-NEXT:    mtc1 $1, $f0
+; FP32-NEXT:    mtc1 $3, $f1
 ; FP32-NEXT:    sub.d $f2, $f12, $f0
 ; FP32-NEXT:    trunc.w.d $f2, $f2
-; FP32-NEXT:    mfc1 $2, $f2
+; FP32-NEXT:    mfc1 $1, $f2
 ; FP32-NEXT:    lui $3, 32768
-; FP32-NEXT:    xor $2, $2, $3
+; FP32-NEXT:    xor $1, $1, $3
 ; FP32-NEXT:    addiu $3, $zero, 1
 ; FP32-NEXT:    c.ult.d $f12, $f0
 ; FP32-NEXT:    movf $3, $zero, $fcc0
 ; FP32-NEXT:    andi $3, $3, 1
-; FP32-NEXT:    movn $2, $1, $3
-; FP32-NEXT:    andi $2, $2, 255
+; FP32-NEXT:    movn $1, $2, $3
+; FP32-NEXT:    andi $2, $1, 255
 ; FP32-NEXT:    jr $ra
 ; FP32-NEXT:    nop
 ;
 ; FP64-LABEL: f64tou8:
 ; FP64:       # %bb.0: # %entry
 ; FP64-NEXT:    trunc.w.d $f0, $f12
-; FP64-NEXT:    mfc1 $1, $f0
-; FP64-NEXT:    lui $2, 16864
-; FP64-NEXT:    ori $3, $zero, 0
-; FP64-NEXT:    mtc1 $3, $f0
-; FP64-NEXT:    mthc1 $2, $f0
+; FP64-NEXT:    mfc1 $2, $f0
+; FP64-NEXT:    lui $3, 16864
+; FP64-NEXT:    ori $1, $zero, 0
+; FP64-NEXT:    mtc1 $1, $f0
+; FP64-NEXT:    mthc1 $3, $f0
 ; FP64-NEXT:    sub.d $f1, $f12, $f0
 ; FP64-NEXT:    trunc.w.d $f1, $f1
-; FP64-NEXT:    mfc1 $2, $f1
+; FP64-NEXT:    mfc1 $1, $f1
 ; FP64-NEXT:    lui $3, 32768
-; FP64-NEXT:    xor $2, $2, $3
+; FP64-NEXT:    xor $1, $1, $3
 ; FP64-NEXT:    addiu $3, $zero, 1
 ; FP64-NEXT:    c.ult.d $f12, $f0
 ; FP64-NEXT:    movf $3, $zero, $fcc0
 ; FP64-NEXT:    andi $3, $3, 1
-; FP64-NEXT:    movn $2, $1, $3
-; FP64-NEXT:    andi $2, $2, 255
+; FP64-NEXT:    movn $1, $2, $3
+; FP64-NEXT:    andi $2, $1, 255
 ; FP64-NEXT:    jr $ra
 ; FP64-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
index 6e7e56aaa1ba..a23ab7c3ca8f 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll
@@ -14,12 +14,11 @@ define i32 @main() {
 ; MIPS32-NEXT:    addiu $4, $1, %lo($.str)
 ; MIPS32-NEXT:    lui $1, 18838
 ; MIPS32-NEXT:    ori $5, $1, 722
-; MIPS32-NEXT:    ori $2, $zero, 0
-; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $1, $zero, 0
+; MIPS32-NEXT:    sw $1, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    jal printf
 ; MIPS32-NEXT:    nop
-; MIPS32-NEXT:    lw $1, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 24
 ; MIPS32-NEXT:    jr $ra

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
index e293a565fc70..8e8ca91eb9de 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll
@@ -23,9 +23,8 @@ define i32 @call_global(i32 %a, i32 %b) {
 ; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
 ; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
-; MIPS32_PIC-NEXT:    addu $1, $2, $25
-; MIPS32_PIC-NEXT:    lw $25, %call16(f)($1)
-; MIPS32_PIC-NEXT:    move $gp, $1
+; MIPS32_PIC-NEXT:    addu $gp, $2, $25
+; MIPS32_PIC-NEXT:    lw $25, %call16(f)($gp)
 ; MIPS32_PIC-NEXT:    jalr $25
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload
@@ -46,10 +45,9 @@ define i32 @call_global_with_local_linkage(i32 %a, i32 %b) {
 ; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 24
 ; MIPS32_PIC-NEXT:    sw $ra, 20($sp) # 4-byte Folded Spill
 ; MIPS32_PIC-NEXT:    .cfi_offset 31, -4
-; MIPS32_PIC-NEXT:    addu $1, $2, $25
-; MIPS32_PIC-NEXT:    lw $2, %got(f_with_local_linkage)($1)
-; MIPS32_PIC-NEXT:    addiu $25, $2, %lo(f_with_local_linkage)
-; MIPS32_PIC-NEXT:    move $gp, $1
+; MIPS32_PIC-NEXT:    addu $gp, $2, $25
+; MIPS32_PIC-NEXT:    lw $1, %got(f_with_local_linkage)($gp)
+; MIPS32_PIC-NEXT:    addiu $25, $1, %lo(f_with_local_linkage)
 ; MIPS32_PIC-NEXT:    jalr $25
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:    lw $ra, 20($sp) # 4-byte Folded Reload

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
index 7eb952b47c56..a7e0d05544be 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll
@@ -188,13 +188,12 @@ entry:
 define i1 @sgt_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: sgt_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    slt $1, $7, $5
-; MIPS32-NEXT:    xor $2, $5, $7
-; MIPS32-NEXT:    sltiu $2, $2, 1
-; MIPS32-NEXT:    sltu $3, $6, $4
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    movn $1, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    slt $2, $7, $5
+; MIPS32-NEXT:    xor $1, $5, $7
+; MIPS32-NEXT:    sltiu $3, $1, 1
+; MIPS32-NEXT:    sltu $1, $6, $4
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    movn $2, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -206,14 +205,13 @@ define i1 @sge_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: sge_i64:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    slt $1, $5, $7
+; MIPS32-NEXT:    xori $2, $1, 1
+; MIPS32-NEXT:    xor $1, $5, $7
+; MIPS32-NEXT:    sltiu $3, $1, 1
+; MIPS32-NEXT:    sltu $1, $4, $6
 ; MIPS32-NEXT:    xori $1, $1, 1
-; MIPS32-NEXT:    xor $2, $5, $7
-; MIPS32-NEXT:    sltiu $2, $2, 1
-; MIPS32-NEXT:    sltu $3, $4, $6
-; MIPS32-NEXT:    xori $3, $3, 1
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    movn $1, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    movn $2, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -224,13 +222,12 @@ entry:
 define i1 @slt_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: slt_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    slt $1, $5, $7
-; MIPS32-NEXT:    xor $2, $5, $7
-; MIPS32-NEXT:    sltiu $2, $2, 1
-; MIPS32-NEXT:    sltu $3, $4, $6
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    movn $1, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    slt $2, $5, $7
+; MIPS32-NEXT:    xor $1, $5, $7
+; MIPS32-NEXT:    sltiu $3, $1, 1
+; MIPS32-NEXT:    sltu $1, $4, $6
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    movn $2, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -242,14 +239,13 @@ define i1 @sle_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: sle_i64:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    slt $1, $7, $5
+; MIPS32-NEXT:    xori $2, $1, 1
+; MIPS32-NEXT:    xor $1, $5, $7
+; MIPS32-NEXT:    sltiu $3, $1, 1
+; MIPS32-NEXT:    sltu $1, $6, $4
 ; MIPS32-NEXT:    xori $1, $1, 1
-; MIPS32-NEXT:    xor $2, $5, $7
-; MIPS32-NEXT:    sltiu $2, $2, 1
-; MIPS32-NEXT:    sltu $3, $6, $4
-; MIPS32-NEXT:    xori $3, $3, 1
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    movn $1, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    movn $2, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -260,13 +256,12 @@ entry:
 define i1 @ugt_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: ugt_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sltu $1, $7, $5
-; MIPS32-NEXT:    xor $2, $5, $7
-; MIPS32-NEXT:    sltiu $2, $2, 1
-; MIPS32-NEXT:    sltu $3, $6, $4
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    movn $1, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    sltu $2, $7, $5
+; MIPS32-NEXT:    xor $1, $5, $7
+; MIPS32-NEXT:    sltiu $3, $1, 1
+; MIPS32-NEXT:    sltu $1, $6, $4
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    movn $2, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -278,14 +273,13 @@ define i1 @uge_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: uge_i64:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    sltu $1, $5, $7
+; MIPS32-NEXT:    xori $2, $1, 1
+; MIPS32-NEXT:    xor $1, $5, $7
+; MIPS32-NEXT:    sltiu $3, $1, 1
+; MIPS32-NEXT:    sltu $1, $4, $6
 ; MIPS32-NEXT:    xori $1, $1, 1
-; MIPS32-NEXT:    xor $2, $5, $7
-; MIPS32-NEXT:    sltiu $2, $2, 1
-; MIPS32-NEXT:    sltu $3, $4, $6
-; MIPS32-NEXT:    xori $3, $3, 1
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    movn $1, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    movn $2, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -296,13 +290,12 @@ entry:
 define i1 @ult_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: ult_i64:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    sltu $1, $5, $7
-; MIPS32-NEXT:    xor $2, $5, $7
-; MIPS32-NEXT:    sltiu $2, $2, 1
-; MIPS32-NEXT:    sltu $3, $4, $6
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    movn $1, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    sltu $2, $5, $7
+; MIPS32-NEXT:    xor $1, $5, $7
+; MIPS32-NEXT:    sltiu $3, $1, 1
+; MIPS32-NEXT:    sltu $1, $4, $6
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    movn $2, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:
@@ -314,14 +307,13 @@ define i1 @ule_i64(i64 %a, i64 %b) {
 ; MIPS32-LABEL: ule_i64:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    sltu $1, $7, $5
+; MIPS32-NEXT:    xori $2, $1, 1
+; MIPS32-NEXT:    xor $1, $5, $7
+; MIPS32-NEXT:    sltiu $3, $1, 1
+; MIPS32-NEXT:    sltu $1, $6, $4
 ; MIPS32-NEXT:    xori $1, $1, 1
-; MIPS32-NEXT:    xor $2, $5, $7
-; MIPS32-NEXT:    sltiu $2, $2, 1
-; MIPS32-NEXT:    sltu $3, $6, $4
-; MIPS32-NEXT:    xori $3, $3, 1
-; MIPS32-NEXT:    andi $2, $2, 1
-; MIPS32-NEXT:    movn $1, $3, $2
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    andi $3, $3, 1
+; MIPS32-NEXT:    movn $2, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 entry:

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll
index dcd6c76a8b2a..804a14853bed 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll
@@ -7,35 +7,35 @@ define i32 @mod4_0_to_11(i32 %a) {
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    addiu $sp, $sp, -32
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32-NEXT:    sw $4, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    ori $1, $zero, 7
 ; MIPS32-NEXT:    ori $2, $zero, 3
-; MIPS32-NEXT:    ori $3, $zero, 2
-; MIPS32-NEXT:    ori $5, $zero, 1
-; MIPS32-NEXT:    ori $6, $zero, 0
-; MIPS32-NEXT:    addiu $7, $zero, 65535
-; MIPS32-NEXT:    ori $8, $zero, 0
-; MIPS32-NEXT:    subu $8, $4, $8
-; MIPS32-NEXT:    sltu $1, $1, $8
-; MIPS32-NEXT:    andi $1, $1, 1
-; MIPS32-NEXT:    sw $4, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $2, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $2, $zero, 2
+; MIPS32-NEXT:    sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $2, $zero, 1
+; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $2, $zero, 0
+; MIPS32-NEXT:    sw $2, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $2, $zero, 65535
 ; MIPS32-NEXT:    sw $2, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $3, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $5, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $7, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $8, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $2, $zero, 0
+; MIPS32-NEXT:    subu $2, $4, $2
+; MIPS32-NEXT:    sw $2, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sltu $1, $1, $2
+; MIPS32-NEXT:    andi $1, $1, 1
 ; MIPS32-NEXT:    bnez $1, $BB0_6
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_1: # %entry
+; MIPS32-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    lui $1, %hi($JTI0_0)
-; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sll $3, $2, 2
-; MIPS32-NEXT:    addu $1, $1, $3
+; MIPS32-NEXT:    sll $2, $2, 2
+; MIPS32-NEXT:    addu $1, $1, $2
 ; MIPS32-NEXT:    lw $1, %lo($JTI0_0)($1)
 ; MIPS32-NEXT:    jr $1
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_2: # %sw.bb
-; MIPS32-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 32
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
@@ -45,37 +45,37 @@ define i32 @mod4_0_to_11(i32 %a) {
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_4: # %sw.bb2
-; MIPS32-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 32
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_5: # %sw.bb3
-; MIPS32-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 8($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 32
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_6: # %sw.default
 ; MIPS32-NEXT:    .insn
 ; MIPS32-NEXT:  # %bb.7: # %sw.epilog
-; MIPS32-NEXT:    ori $1, $zero, 8
-; MIPS32-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    subu $1, $2, $1
-; MIPS32-NEXT:    lw $3, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sltu $4, $3, $1
-; MIPS32-NEXT:    andi $4, $4, 1
-; MIPS32-NEXT:    sw $1, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $4, $BB0_13
+; MIPS32-NEXT:    lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    ori $3, $zero, 8
+; MIPS32-NEXT:    subu $2, $2, $3
+; MIPS32-NEXT:    sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sltu $1, $1, $2
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB0_13
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_8: # %sw.epilog
-; MIPS32-NEXT:    lui $1, %hi($JTI0_1)
 ; MIPS32-NEXT:    lw $2, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sll $3, $2, 2
-; MIPS32-NEXT:    addu $1, $1, $3
+; MIPS32-NEXT:    lui $1, %hi($JTI0_1)
+; MIPS32-NEXT:    sll $2, $2, 2
+; MIPS32-NEXT:    addu $1, $1, $2
 ; MIPS32-NEXT:    lw $1, %lo($JTI0_1)($1)
 ; MIPS32-NEXT:    jr $1
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_9: # %sw.bb4
-; MIPS32-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 32
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
@@ -85,35 +85,20 @@ define i32 @mod4_0_to_11(i32 %a) {
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_11: # %sw.bb6
-; MIPS32-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 32
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_12: # %sw.bb7
-; MIPS32-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 8($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 32
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_13: # %sw.default8
-; MIPS32-NEXT:    lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    addiu $sp, $sp, 32
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
-; MIPS32:       $JTI0_0:
-; MIPS32-NEXT:   .4byte ($BB0_2)
-; MIPS32-NEXT:   .4byte ($BB0_3)
-; MIPS32-NEXT:   .4byte ($BB0_4)
-; MIPS32-NEXT:   .4byte ($BB0_5)
-; MIPS32-NEXT:   .4byte ($BB0_2)
-; MIPS32-NEXT:   .4byte ($BB0_3)
-; MIPS32-NEXT:   .4byte ($BB0_4)
-; MIPS32-NEXT:   .4byte ($BB0_5)
-; MIPS32-NEXT:   $JTI0_1:
-; MIPS32-NEXT:   .4byte ($BB0_9)
-; MIPS32-NEXT:   .4byte ($BB0_10)
-; MIPS32-NEXT:   .4byte ($BB0_11)
-; MIPS32-NEXT:   .4byte ($BB0_12)
-
 ;
 ; MIPS32_PIC-LABEL: mod4_0_to_11:
 ; MIPS32_PIC:       # %bb.0: # %entry
@@ -122,117 +107,104 @@ define i32 @mod4_0_to_11(i32 %a) {
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, -40
 ; MIPS32_PIC-NEXT:    .cfi_def_cfa_offset 40
 ; MIPS32_PIC-NEXT:    addu $1, $2, $25
-; MIPS32_PIC-NEXT:    ori $2, $zero, 7
-; MIPS32_PIC-NEXT:    ori $3, $zero, 3
-; MIPS32_PIC-NEXT:    ori $5, $zero, 2
-; MIPS32_PIC-NEXT:    ori $6, $zero, 1
-; MIPS32_PIC-NEXT:    ori $7, $zero, 0
-; MIPS32_PIC-NEXT:    addiu $8, $zero, 65535
-; MIPS32_PIC-NEXT:    ori $9, $zero, 0
-; MIPS32_PIC-NEXT:    subu $9, $4, $9
-; MIPS32_PIC-NEXT:    sltu $2, $2, $9
-; MIPS32_PIC-NEXT:    andi $2, $2, 1
-; MIPS32_PIC-NEXT:    sw $1, 36($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    sw $4, 32($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    sw $3, 28($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    sw $5, 24($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    sw $6, 20($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    sw $7, 16($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    sw $8, 12($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    sw $9, 8($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    bnez $2, $BB0_6
+; MIPS32_PIC-NEXT:    sw $1, 8($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    sw $4, 12($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    ori $1, $zero, 7
+; MIPS32_PIC-NEXT:    ori $2, $zero, 3
+; MIPS32_PIC-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    ori $2, $zero, 2
+; MIPS32_PIC-NEXT:    sw $2, 20($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    ori $2, $zero, 1
+; MIPS32_PIC-NEXT:    sw $2, 24($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    ori $2, $zero, 0
+; MIPS32_PIC-NEXT:    sw $2, 28($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    addiu $2, $zero, 65535
+; MIPS32_PIC-NEXT:    sw $2, 32($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    ori $2, $zero, 0
+; MIPS32_PIC-NEXT:    subu $2, $4, $2
+; MIPS32_PIC-NEXT:    sw $2, 36($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    sltu $1, $1, $2
+; MIPS32_PIC-NEXT:    andi $1, $1, 1
+; MIPS32_PIC-NEXT:    bnez $1, $BB0_6
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_1: # %entry
-; MIPS32_PIC-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT:    lw $2, %got($JTI0_0)($1)
-; MIPS32_PIC-NEXT:    lw $3, 8($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT:    sll $4, $3, 2
-; MIPS32_PIC-NEXT:    addu $2, $2, $4
-; MIPS32_PIC-NEXT:    lw $2, %lo($JTI0_0)($2)
-; MIPS32_PIC-NEXT:    addu $2, $2, $1
-; MIPS32_PIC-NEXT:    jr $2
+; MIPS32_PIC-NEXT:    lw $2, 8($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $3, 36($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $1, %got($JTI0_0)($2)
+; MIPS32_PIC-NEXT:    sll $3, $3, 2
+; MIPS32_PIC-NEXT:    addu $1, $1, $3
+; MIPS32_PIC-NEXT:    lw $1, %lo($JTI0_0)($1)
+; MIPS32_PIC-NEXT:    addu $1, $1, $2
+; MIPS32_PIC-NEXT:    jr $1
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_2: # %sw.bb
-; MIPS32_PIC-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_3: # %sw.bb1
-; MIPS32_PIC-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_4: # %sw.bb2
-; MIPS32_PIC-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_5: # %sw.bb3
-; MIPS32_PIC-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_6: # %sw.default
 ; MIPS32_PIC-NEXT:    .insn
 ; MIPS32_PIC-NEXT:  # %bb.7: # %sw.epilog
-; MIPS32_PIC-NEXT:    ori $1, $zero, 8
-; MIPS32_PIC-NEXT:    lw $2, 32($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT:    subu $1, $2, $1
-; MIPS32_PIC-NEXT:    lw $3, 28($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT:    sltu $4, $3, $1
-; MIPS32_PIC-NEXT:    andi $4, $4, 1
-; MIPS32_PIC-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
-; MIPS32_PIC-NEXT:    bnez $4, $BB0_13
+; MIPS32_PIC-NEXT:    lw $1, 16($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    ori $3, $zero, 8
+; MIPS32_PIC-NEXT:    subu $2, $2, $3
+; MIPS32_PIC-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32_PIC-NEXT:    sltu $1, $1, $2
+; MIPS32_PIC-NEXT:    andi $1, $1, 1
+; MIPS32_PIC-NEXT:    bnez $1, $BB0_13
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_8: # %sw.epilog
-; MIPS32_PIC-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT:    lw $2, %got($JTI0_1)($1)
+; MIPS32_PIC-NEXT:    lw $2, 8($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    lw $3, 4($sp) # 4-byte Folded Reload
-; MIPS32_PIC-NEXT:    sll $4, $3, 2
-; MIPS32_PIC-NEXT:    addu $2, $2, $4
-; MIPS32_PIC-NEXT:    lw $2, %lo($JTI0_1)($2)
-; MIPS32_PIC-NEXT:    addu $2, $2, $1
-; MIPS32_PIC-NEXT:    jr $2
+; MIPS32_PIC-NEXT:    lw $1, %got($JTI0_1)($2)
+; MIPS32_PIC-NEXT:    sll $3, $3, 2
+; MIPS32_PIC-NEXT:    addu $1, $1, $3
+; MIPS32_PIC-NEXT:    lw $1, %lo($JTI0_1)($1)
+; MIPS32_PIC-NEXT:    addu $1, $1, $2
+; MIPS32_PIC-NEXT:    jr $1
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_9: # %sw.bb4
-; MIPS32_PIC-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_10: # %sw.bb5
-; MIPS32_PIC-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_11: # %sw.bb6
-; MIPS32_PIC-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_12: # %sw.bb7
-; MIPS32_PIC-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
 ; MIPS32_PIC-NEXT:  $BB0_13: # %sw.default8
-; MIPS32_PIC-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
+; MIPS32_PIC-NEXT:    lw $2, 32($sp) # 4-byte Folded Reload
 ; MIPS32_PIC-NEXT:    addiu $sp, $sp, 40
 ; MIPS32_PIC-NEXT:    jr $ra
 ; MIPS32_PIC-NEXT:    nop
-; MIPS32_PIC:       $JTI0_0:
-; MIPS32_PIC-NEXT:   .gpword ($BB0_2)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_3)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_4)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_5)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_2)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_3)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_4)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_5)
-; MIPS32_PIC-NEXT:   $JTI0_1:
-; MIPS32_PIC-NEXT:   .gpword ($BB0_9)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_10)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_11)
-; MIPS32_PIC-NEXT:   .gpword ($BB0_12)
+
 
 entry:
   switch i32 %a, label %sw.default [

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll
index 318407d619f5..90043c0e9a12 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll
@@ -15,11 +15,11 @@ define float @load_float_align1() {
 ; MIPS32-LABEL: load_float_align1:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(float_align1)
-; MIPS32-NEXT:    addiu $1, $1, %lo(float_align1)
-; MIPS32-NEXT:    # implicit-def: $v0
-; MIPS32-NEXT:    lwl $2, 3($1)
-; MIPS32-NEXT:    lwr $2, 0($1)
-; MIPS32-NEXT:    mtc1 $2, $f0
+; MIPS32-NEXT:    addiu $2, $1, %lo(float_align1)
+; MIPS32-NEXT:    # implicit-def: $at
+; MIPS32-NEXT:    lwl $1, 3($2)
+; MIPS32-NEXT:    lwr $1, 0($2)
+; MIPS32-NEXT:    mtc1 $1, $f0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
@@ -38,11 +38,11 @@ define float @load_float_align2() {
 ; MIPS32-LABEL: load_float_align2:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(float_align2)
-; MIPS32-NEXT:    addiu $1, $1, %lo(float_align2)
-; MIPS32-NEXT:    # implicit-def: $v0
-; MIPS32-NEXT:    lwl $2, 3($1)
-; MIPS32-NEXT:    lwr $2, 0($1)
-; MIPS32-NEXT:    mtc1 $2, $f0
+; MIPS32-NEXT:    addiu $2, $1, %lo(float_align2)
+; MIPS32-NEXT:    # implicit-def: $at
+; MIPS32-NEXT:    lwl $1, 3($2)
+; MIPS32-NEXT:    lwr $1, 0($2)
+; MIPS32-NEXT:    mtc1 $1, $f0
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll
index c7a70d56f8a0..a2afbf1c637e 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll
@@ -131,25 +131,23 @@ entry:
 define i64 @load5align1(%struct.MemSize5_Align1* %S) {
 ; MIPS32-LABEL: load5align1:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    # implicit-def: $at
-; MIPS32-NEXT:    lwl $1, 3($4)
-; MIPS32-NEXT:    lwr $1, 0($4)
-; MIPS32-NEXT:    lbu $2, 4($4)
+; MIPS32-NEXT:    # implicit-def: $v0
+; MIPS32-NEXT:    lwl $2, 3($4)
+; MIPS32-NEXT:    lwr $2, 0($4)
+; MIPS32-NEXT:    lbu $1, 4($4)
 ; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    andi $3, $2, 255
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    andi $3, $1, 255
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load5align1:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lbu $2, 4($4)
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lbu $1, 4($4)
 ; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    andi $3, $2, 255
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    and $2, $2, $3
+; MIPS32R6-NEXT:    andi $3, $1, 255
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize5_Align1* %S to i40*
@@ -161,25 +159,23 @@ entry:
 define i64 @load5align2(%struct.MemSize5_Align2* %S) {
 ; MIPS32-LABEL: load5align2:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    # implicit-def: $at
-; MIPS32-NEXT:    lwl $1, 3($4)
-; MIPS32-NEXT:    lwr $1, 0($4)
-; MIPS32-NEXT:    lbu $2, 4($4)
+; MIPS32-NEXT:    # implicit-def: $v0
+; MIPS32-NEXT:    lwl $2, 3($4)
+; MIPS32-NEXT:    lwr $2, 0($4)
+; MIPS32-NEXT:    lbu $1, 4($4)
 ; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    andi $3, $2, 255
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    andi $3, $1, 255
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load5align2:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lbu $2, 4($4)
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lbu $1, 4($4)
 ; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    andi $3, $2, 255
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    and $2, $2, $3
+; MIPS32R6-NEXT:    andi $3, $1, 255
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize5_Align2* %S to i40*
@@ -191,23 +187,21 @@ entry:
 define i64 @load5align4(%struct.MemSize5_Align4* %S) {
 ; MIPS32-LABEL: load5align4:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $1, 0($4)
-; MIPS32-NEXT:    lbu $2, 4($4)
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lbu $1, 4($4)
 ; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    andi $3, $2, 255
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    andi $3, $1, 255
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load5align4:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lbu $2, 4($4)
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lbu $1, 4($4)
 ; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    andi $3, $2, 255
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    and $2, $2, $3
+; MIPS32R6-NEXT:    andi $3, $1, 255
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize5_Align4* %S to i40*
@@ -219,23 +213,21 @@ entry:
 define i64 @load5align8(%struct.MemSize5_Align8* %S) {
 ; MIPS32-LABEL: load5align8:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $1, 0($4)
-; MIPS32-NEXT:    lbu $2, 4($4)
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lbu $1, 4($4)
 ; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    andi $3, $2, 255
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    andi $3, $1, 255
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load5align8:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lbu $2, 4($4)
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lbu $1, 4($4)
 ; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    andi $3, $2, 255
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    and $2, $2, $3
+; MIPS32R6-NEXT:    andi $3, $1, 255
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize5_Align8* %S to i40*
@@ -247,27 +239,25 @@ entry:
 define i64 @load6align1(%struct.MemSize6_Align1* %S) {
 ; MIPS32-LABEL: load6align1:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    # implicit-def: $at
-; MIPS32-NEXT:    lwl $1, 3($4)
-; MIPS32-NEXT:    lwr $1, 0($4)
 ; MIPS32-NEXT:    # implicit-def: $v0
-; MIPS32-NEXT:    lwl $2, 7($4)
-; MIPS32-NEXT:    lwr $2, 4($4)
+; MIPS32-NEXT:    lwl $2, 3($4)
+; MIPS32-NEXT:    lwr $2, 0($4)
+; MIPS32-NEXT:    # implicit-def: $at
+; MIPS32-NEXT:    lwl $1, 7($4)
+; MIPS32-NEXT:    lwr $1, 4($4)
 ; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    andi $3, $2, 65535
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    andi $3, $1, 65535
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load6align1:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lhu $2, 4($4)
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lhu $1, 4($4)
 ; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    andi $3, $2, 65535
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    and $2, $2, $3
+; MIPS32R6-NEXT:    andi $3, $1, 65535
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize6_Align1* %S to i48*
@@ -279,25 +269,23 @@ entry:
 define i64 @load6align2(%struct.MemSize6_Align2* %S) {
 ; MIPS32-LABEL: load6align2:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    # implicit-def: $at
-; MIPS32-NEXT:    lwl $1, 3($4)
-; MIPS32-NEXT:    lwr $1, 0($4)
-; MIPS32-NEXT:    lhu $2, 4($4)
+; MIPS32-NEXT:    # implicit-def: $v0
+; MIPS32-NEXT:    lwl $2, 3($4)
+; MIPS32-NEXT:    lwr $2, 0($4)
+; MIPS32-NEXT:    lhu $1, 4($4)
 ; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    andi $3, $2, 65535
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    andi $3, $1, 65535
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load6align2:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lhu $2, 4($4)
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lhu $1, 4($4)
 ; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    andi $3, $2, 65535
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    and $2, $2, $3
+; MIPS32R6-NEXT:    andi $3, $1, 65535
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize6_Align2* %S to i48*
@@ -309,23 +297,21 @@ entry:
 define i64 @load6align4(%struct.MemSize6_Align4* %S) {
 ; MIPS32-LABEL: load6align4:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $1, 0($4)
-; MIPS32-NEXT:    lhu $2, 4($4)
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lhu $1, 4($4)
 ; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    andi $3, $2, 65535
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    andi $3, $1, 65535
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load6align4:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lhu $2, 4($4)
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lhu $1, 4($4)
 ; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    andi $3, $2, 65535
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    and $2, $2, $3
+; MIPS32R6-NEXT:    andi $3, $1, 65535
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize6_Align4* %S to i48*
@@ -337,23 +323,21 @@ entry:
 define i64 @load6align8(%struct.MemSize6_Align8* %S) {
 ; MIPS32-LABEL: load6align8:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $1, 0($4)
-; MIPS32-NEXT:    lhu $2, 4($4)
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lhu $1, 4($4)
 ; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    andi $3, $2, 65535
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    and $2, $2, $3
+; MIPS32-NEXT:    andi $3, $1, 65535
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load6align8:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lhu $2, 4($4)
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lhu $1, 4($4)
 ; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    andi $3, $2, 65535
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    and $2, $2, $3
+; MIPS32R6-NEXT:    andi $3, $1, 65535
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize6_Align8* %S to i48*
@@ -365,31 +349,29 @@ entry:
 define i64 @load7align1(%struct.MemSize7_Align1* %S) {
 ; MIPS32-LABEL: load7align1:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    # implicit-def: $at
-; MIPS32-NEXT:    lwl $1, 3($4)
-; MIPS32-NEXT:    lwr $1, 0($4)
 ; MIPS32-NEXT:    # implicit-def: $v0
-; MIPS32-NEXT:    lwl $2, 7($4)
-; MIPS32-NEXT:    lwr $2, 4($4)
-; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    lui $4, 255
-; MIPS32-NEXT:    ori $4, $4, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    and $3, $2, $4
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    lwl $2, 3($4)
+; MIPS32-NEXT:    lwr $2, 0($4)
+; MIPS32-NEXT:    # implicit-def: $at
+; MIPS32-NEXT:    lwl $1, 7($4)
+; MIPS32-NEXT:    lwr $1, 4($4)
+; MIPS32-NEXT:    addiu $4, $zero, 65535
+; MIPS32-NEXT:    lui $3, 255
+; MIPS32-NEXT:    ori $3, $3, 65535
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    and $3, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load7align1:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lw $2, 4($4)
-; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    lui $4, 255
-; MIPS32R6-NEXT:    ori $4, $4, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    and $3, $2, $4
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lw $1, 4($4)
+; MIPS32R6-NEXT:    addiu $4, $zero, 65535
+; MIPS32R6-NEXT:    lui $3, 255
+; MIPS32R6-NEXT:    ori $3, $3, 65535
+; MIPS32R6-NEXT:    and $2, $2, $4
+; MIPS32R6-NEXT:    and $3, $1, $3
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize7_Align1* %S to i56*
@@ -401,31 +383,29 @@ entry:
 define i64 @load7align2(%struct.MemSize7_Align2* %S) {
 ; MIPS32-LABEL: load7align2:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    # implicit-def: $at
-; MIPS32-NEXT:    lwl $1, 3($4)
-; MIPS32-NEXT:    lwr $1, 0($4)
 ; MIPS32-NEXT:    # implicit-def: $v0
-; MIPS32-NEXT:    lwl $2, 7($4)
-; MIPS32-NEXT:    lwr $2, 4($4)
-; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    lui $4, 255
-; MIPS32-NEXT:    ori $4, $4, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    and $3, $2, $4
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    lwl $2, 3($4)
+; MIPS32-NEXT:    lwr $2, 0($4)
+; MIPS32-NEXT:    # implicit-def: $at
+; MIPS32-NEXT:    lwl $1, 7($4)
+; MIPS32-NEXT:    lwr $1, 4($4)
+; MIPS32-NEXT:    addiu $4, $zero, 65535
+; MIPS32-NEXT:    lui $3, 255
+; MIPS32-NEXT:    ori $3, $3, 65535
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    and $3, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load7align2:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lw $2, 4($4)
-; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    lui $4, 255
-; MIPS32R6-NEXT:    ori $4, $4, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    and $3, $2, $4
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lw $1, 4($4)
+; MIPS32R6-NEXT:    addiu $4, $zero, 65535
+; MIPS32R6-NEXT:    lui $3, 255
+; MIPS32R6-NEXT:    ori $3, $3, 65535
+; MIPS32R6-NEXT:    and $2, $2, $4
+; MIPS32R6-NEXT:    and $3, $1, $3
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize7_Align2* %S to i56*
@@ -437,27 +417,25 @@ entry:
 define i64 @load7align4(%struct.MemSize7_Align4* %S) {
 ; MIPS32-LABEL: load7align4:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $1, 0($4)
-; MIPS32-NEXT:    lw $2, 4($4)
-; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    lui $4, 255
-; MIPS32-NEXT:    ori $4, $4, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    and $3, $2, $4
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lw $1, 4($4)
+; MIPS32-NEXT:    addiu $4, $zero, 65535
+; MIPS32-NEXT:    lui $3, 255
+; MIPS32-NEXT:    ori $3, $3, 65535
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    and $3, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load7align4:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lw $2, 4($4)
-; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    lui $4, 255
-; MIPS32R6-NEXT:    ori $4, $4, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    and $3, $2, $4
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lw $1, 4($4)
+; MIPS32R6-NEXT:    addiu $4, $zero, 65535
+; MIPS32R6-NEXT:    lui $3, 255
+; MIPS32R6-NEXT:    ori $3, $3, 65535
+; MIPS32R6-NEXT:    and $2, $2, $4
+; MIPS32R6-NEXT:    and $3, $1, $3
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize7_Align4* %S to i56*
@@ -469,27 +447,25 @@ entry:
 define i64 @load7align8(%struct.MemSize7_Align8* %S) {
 ; MIPS32-LABEL: load7align8:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $1, 0($4)
-; MIPS32-NEXT:    lw $2, 4($4)
-; MIPS32-NEXT:    addiu $3, $zero, 65535
-; MIPS32-NEXT:    lui $4, 255
-; MIPS32-NEXT:    ori $4, $4, 65535
-; MIPS32-NEXT:    and $1, $1, $3
-; MIPS32-NEXT:    and $3, $2, $4
-; MIPS32-NEXT:    move $2, $1
+; MIPS32-NEXT:    lw $2, 0($4)
+; MIPS32-NEXT:    lw $1, 4($4)
+; MIPS32-NEXT:    addiu $4, $zero, 65535
+; MIPS32-NEXT:    lui $3, 255
+; MIPS32-NEXT:    ori $3, $3, 65535
+; MIPS32-NEXT:    and $2, $2, $4
+; MIPS32-NEXT:    and $3, $1, $3
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
 ; MIPS32R6-LABEL: load7align8:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $1, 0($4)
-; MIPS32R6-NEXT:    lw $2, 4($4)
-; MIPS32R6-NEXT:    addiu $3, $zero, 65535
-; MIPS32R6-NEXT:    lui $4, 255
-; MIPS32R6-NEXT:    ori $4, $4, 65535
-; MIPS32R6-NEXT:    and $1, $1, $3
-; MIPS32R6-NEXT:    and $3, $2, $4
-; MIPS32R6-NEXT:    move $2, $1
+; MIPS32R6-NEXT:    lw $2, 0($4)
+; MIPS32R6-NEXT:    lw $1, 4($4)
+; MIPS32R6-NEXT:    addiu $4, $zero, 65535
+; MIPS32R6-NEXT:    lui $3, 255
+; MIPS32R6-NEXT:    ori $3, $3, 65535
+; MIPS32R6-NEXT:    and $2, $2, $4
+; MIPS32R6-NEXT:    and $3, $1, $3
 ; MIPS32R6-NEXT:    jrc $ra
 entry:
   %0 = bitcast %struct.MemSize7_Align8* %S to i56*
@@ -502,15 +478,15 @@ define double @load_double_align1() {
 ; MIPS32-LABEL: load_double_align1:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(double_align1)
-; MIPS32-NEXT:    addiu $1, $1, %lo(double_align1)
+; MIPS32-NEXT:    addiu $3, $1, %lo(double_align1)
+; MIPS32-NEXT:    # implicit-def: $at
+; MIPS32-NEXT:    lwl $1, 3($3)
+; MIPS32-NEXT:    lwr $1, 0($3)
 ; MIPS32-NEXT:    # implicit-def: $v0
-; MIPS32-NEXT:    lwl $2, 3($1)
-; MIPS32-NEXT:    lwr $2, 0($1)
-; MIPS32-NEXT:    # implicit-def: $v1
-; MIPS32-NEXT:    lwl $3, 7($1)
-; MIPS32-NEXT:    lwr $3, 4($1)
-; MIPS32-NEXT:    mtc1 $2, $f0
-; MIPS32-NEXT:    mtc1 $3, $f1
+; MIPS32-NEXT:    lwl $2, 7($3)
+; MIPS32-NEXT:    lwr $2, 4($3)
+; MIPS32-NEXT:    mtc1 $1, $f0
+; MIPS32-NEXT:    mtc1 $2, $f1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
@@ -529,15 +505,15 @@ define double @load_double_align2() {
 ; MIPS32-LABEL: load_double_align2:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(double_align2)
-; MIPS32-NEXT:    addiu $1, $1, %lo(double_align2)
+; MIPS32-NEXT:    addiu $3, $1, %lo(double_align2)
+; MIPS32-NEXT:    # implicit-def: $at
+; MIPS32-NEXT:    lwl $1, 3($3)
+; MIPS32-NEXT:    lwr $1, 0($3)
 ; MIPS32-NEXT:    # implicit-def: $v0
-; MIPS32-NEXT:    lwl $2, 3($1)
-; MIPS32-NEXT:    lwr $2, 0($1)
-; MIPS32-NEXT:    # implicit-def: $v1
-; MIPS32-NEXT:    lwl $3, 7($1)
-; MIPS32-NEXT:    lwr $3, 4($1)
-; MIPS32-NEXT:    mtc1 $2, $f0
-; MIPS32-NEXT:    mtc1 $3, $f1
+; MIPS32-NEXT:    lwl $2, 7($3)
+; MIPS32-NEXT:    lwr $2, 4($3)
+; MIPS32-NEXT:    mtc1 $1, $f0
+; MIPS32-NEXT:    mtc1 $2, $f1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;
@@ -556,11 +532,11 @@ define double @load_double_align4() {
 ; MIPS32-LABEL: load_double_align4:
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    lui $1, %hi(double_align4)
-; MIPS32-NEXT:    addiu $1, $1, %lo(double_align4)
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    lw $1, 4($1)
-; MIPS32-NEXT:    mtc1 $2, $f0
-; MIPS32-NEXT:    mtc1 $1, $f1
+; MIPS32-NEXT:    addiu $2, $1, %lo(double_align4)
+; MIPS32-NEXT:    lw $1, 0($2)
+; MIPS32-NEXT:    lw $2, 4($2)
+; MIPS32-NEXT:    mtc1 $1, $f0
+; MIPS32-NEXT:    mtc1 $2, $f1
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ;

diff  --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll
index 2dcc174860c1..ce46bed175d6 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll
@@ -6,126 +6,124 @@ define void @long_chain_ambiguous_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32*
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    addiu $sp, $sp, -48
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32-NEXT:    sw $4, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $5, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $6, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $7, 32($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    addiu $1, $sp, 64
 ; MIPS32-NEXT:    lw $1, 0($1)
-; MIPS32-NEXT:    addiu $2, $sp, 68
-; MIPS32-NEXT:    lw $2, 0($2)
-; MIPS32-NEXT:    addiu $3, $sp, 72
-; MIPS32-NEXT:    lw $3, 0($3)
-; MIPS32-NEXT:    andi $8, $4, 1
+; MIPS32-NEXT:    sw $1, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $sp, 68
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 40($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $sp, 72
+; MIPS32-NEXT:    lw $1, 0($1)
 ; MIPS32-NEXT:    sw $1, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $5, 36($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $6, 32($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $7, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $2, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $3, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $8, $BB0_12
+; MIPS32-NEXT:    andi $1, $4, 1
+; MIPS32-NEXT:    bnez $1, $BB0_12
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.1: # %entry
 ; MIPS32-NEXT:    j $BB0_2
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_2: # %pre.PHI.1
-; MIPS32-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB0_7
+; MIPS32-NEXT:    lw $1, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB0_7
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.3: # %pre.PHI.1
 ; MIPS32-NEXT:    j $BB0_4
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_4: # %pre.PHI.1.0
-; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB0_8
+; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB0_8
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.5: # %pre.PHI.1.0
 ; MIPS32-NEXT:    j $BB0_6
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_6: # %b.PHI.1.0
-; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB0_9
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_7: # %b.PHI.1.1
-; MIPS32-NEXT:    lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB0_9
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_8: # %b.PHI.1.2
-; MIPS32-NEXT:    lw $1, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:  $BB0_9: # %b.PHI.1
-; MIPS32-NEXT:    lw $1, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $3, $2, 1
-; MIPS32-NEXT:    move $4, $1
-; MIPS32-NEXT:    sw $1, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $3, $BB0_11
+; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    sw $2, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    bnez $1, $BB0_11
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.10: # %b.PHI.1
 ; MIPS32-NEXT:    j $BB0_19
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_11: # %b.PHI.1.end
-; MIPS32-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 44($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 48
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_12: # %pre.PHI.2
-; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB0_14
+; MIPS32-NEXT:    lw $1, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB0_14
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.13: # %pre.PHI.2
 ; MIPS32-NEXT:    j $BB0_15
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_14: # %b.PHI.2.0
-; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB0_16
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_15: # %b.PHI.2.1
-; MIPS32-NEXT:    lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:  $BB0_16: # %b.PHI.2
-; MIPS32-NEXT:    lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $3, $2, 1
-; MIPS32-NEXT:    move $4, $1
-; MIPS32-NEXT:    sw $1, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $3, $BB0_19
+; MIPS32-NEXT:    lw $1, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    bnez $1, $BB0_19
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.17: # %b.PHI.2
 ; MIPS32-NEXT:    j $BB0_18
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_18: # %b.PHI.2.end
 ; MIPS32-NEXT:    lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 44($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 48
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB0_19: # %b.PHI.3
-; MIPS32-NEXT:    lw $1, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $3, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $4, $3, 1
-; MIPS32-NEXT:    movn $1, $2, $4
-; MIPS32-NEXT:    lw $4, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $5, $4, 1
-; MIPS32-NEXT:    move $6, $2
-; MIPS32-NEXT:    movn $6, $1, $5
-; MIPS32-NEXT:    lw $1, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sw $6, 0($1)
-; MIPS32-NEXT:    sw $2, 0($1)
+; MIPS32-NEXT:    lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $3, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $5, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    move $4, $1
+; MIPS32-NEXT:    andi $5, $5, 1
+; MIPS32-NEXT:    movn $4, $1, $5
+; MIPS32-NEXT:    andi $5, $3, 1
+; MIPS32-NEXT:    move $3, $1
+; MIPS32-NEXT:    movn $3, $4, $5
+; MIPS32-NEXT:    sw $3, 0($2)
+; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 48
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
@@ -193,132 +191,130 @@ define void @long_chain_i32_in_gpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, i32* %a, i32* %
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    addiu $sp, $sp, -56
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 56
+; MIPS32-NEXT:    sw $4, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $5, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $6, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $7, 36($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    addiu $1, $sp, 72
 ; MIPS32-NEXT:    lw $1, 0($1)
-; MIPS32-NEXT:    addiu $2, $sp, 76
-; MIPS32-NEXT:    lw $2, 0($2)
-; MIPS32-NEXT:    addiu $3, $sp, 80
-; MIPS32-NEXT:    lw $3, 0($3)
-; MIPS32-NEXT:    ori $8, $zero, 0
-; MIPS32-NEXT:    andi $9, $4, 1
+; MIPS32-NEXT:    sw $1, 40($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $sp, 76
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $sp, 80
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 48($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $1, $zero, 0
 ; MIPS32-NEXT:    sw $1, 52($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 48($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $5, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $6, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $7, 36($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $2, 32($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $3, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $8, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $9, $BB1_12
+; MIPS32-NEXT:    andi $1, $4, 1
+; MIPS32-NEXT:    bnez $1, $BB1_12
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.1: # %entry
 ; MIPS32-NEXT:    j $BB1_2
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_2: # %pre.PHI.1
-; MIPS32-NEXT:    lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB1_7
+; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB1_7
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.3: # %pre.PHI.1
 ; MIPS32-NEXT:    j $BB1_4
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_4: # %pre.PHI.1.0
-; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB1_8
+; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB1_8
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.5: # %pre.PHI.1.0
 ; MIPS32-NEXT:    j $BB1_6
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_6: # %b.PHI.1.0
 ; MIPS32-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 20($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB1_9
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_7: # %b.PHI.1.1
-; MIPS32-NEXT:    lw $1, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 20($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB1_9
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_8: # %b.PHI.1.2
-; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 20($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:  $BB1_9: # %b.PHI.1
-; MIPS32-NEXT:    lw $1, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $3, $2, 1
-; MIPS32-NEXT:    move $4, $1
-; MIPS32-NEXT:    lw $5, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sw $1, 16($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $5, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $3, $BB1_11
+; MIPS32-NEXT:    lw $2, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $3, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    sw $3, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    sw $3, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    bnez $1, $BB1_11
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.10: # %b.PHI.1
 ; MIPS32-NEXT:    j $BB1_19
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_11: # %b.PHI.1.end
-; MIPS32-NEXT:    lw $1, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 48($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 56
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_12: # %pre.PHI.2
-; MIPS32-NEXT:    lw $1, 48($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB1_14
+; MIPS32-NEXT:    lw $1, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB1_14
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.13: # %pre.PHI.2
 ; MIPS32-NEXT:    j $BB1_15
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_14: # %b.PHI.2.0
 ; MIPS32-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB1_16
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_15: # %b.PHI.2.1
-; MIPS32-NEXT:    lw $1, 52($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:  $BB1_16: # %b.PHI.2
-; MIPS32-NEXT:    lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $3, $2, 1
-; MIPS32-NEXT:    move $4, $1
-; MIPS32-NEXT:    move $5, $1
-; MIPS32-NEXT:    sw $1, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $5, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $3, $BB1_19
+; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    move $3, $2
+; MIPS32-NEXT:    sw $3, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    bnez $1, $BB1_19
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.17: # %b.PHI.2
 ; MIPS32-NEXT:    j $BB1_18
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_18: # %b.PHI.2.end
 ; MIPS32-NEXT:    lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 48($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 56
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB1_19: # %b.PHI.3
-; MIPS32-NEXT:    lw $1, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $3, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $4, $3, 1
-; MIPS32-NEXT:    movn $1, $2, $4
-; MIPS32-NEXT:    lw $4, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $5, $4, 1
-; MIPS32-NEXT:    move $6, $2
-; MIPS32-NEXT:    movn $6, $1, $5
-; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sw $6, 0($1)
-; MIPS32-NEXT:    sw $2, 0($1)
+; MIPS32-NEXT:    lw $2, 48($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $3, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $5, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $4, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $5, $5, 1
+; MIPS32-NEXT:    movn $4, $1, $5
+; MIPS32-NEXT:    andi $5, $3, 1
+; MIPS32-NEXT:    move $3, $1
+; MIPS32-NEXT:    movn $3, $4, $5
+; MIPS32-NEXT:    sw $3, 0($2)
+; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 56
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
@@ -385,126 +381,124 @@ define void @long_chain_ambiguous_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, flo
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    addiu $sp, $sp, -48
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 48
+; MIPS32-NEXT:    sw $4, 20($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $5, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $6, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $7, 32($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    addiu $1, $sp, 64
 ; MIPS32-NEXT:    lw $1, 0($1)
-; MIPS32-NEXT:    addiu $2, $sp, 68
-; MIPS32-NEXT:    lw $2, 0($2)
-; MIPS32-NEXT:    addiu $3, $sp, 72
-; MIPS32-NEXT:    lw $3, 0($3)
-; MIPS32-NEXT:    andi $8, $4, 1
+; MIPS32-NEXT:    sw $1, 36($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $sp, 68
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 40($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $sp, 72
+; MIPS32-NEXT:    lw $1, 0($1)
 ; MIPS32-NEXT:    sw $1, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $5, 36($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $6, 32($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $7, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $2, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $3, 20($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $8, $BB2_12
+; MIPS32-NEXT:    andi $1, $4, 1
+; MIPS32-NEXT:    bnez $1, $BB2_12
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.1: # %entry
 ; MIPS32-NEXT:    j $BB2_2
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_2: # %pre.PHI.1
-; MIPS32-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB2_7
+; MIPS32-NEXT:    lw $1, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB2_7
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.3: # %pre.PHI.1
 ; MIPS32-NEXT:    j $BB2_4
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_4: # %pre.PHI.1.0
-; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB2_8
+; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB2_8
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.5: # %pre.PHI.1.0
 ; MIPS32-NEXT:    j $BB2_6
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_6: # %b.PHI.1.0
-; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB2_9
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_7: # %b.PHI.1.1
-; MIPS32-NEXT:    lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB2_9
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_8: # %b.PHI.1.2
-; MIPS32-NEXT:    lw $1, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 16($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:  $BB2_9: # %b.PHI.1
-; MIPS32-NEXT:    lw $1, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $3, $2, 1
-; MIPS32-NEXT:    move $4, $1
-; MIPS32-NEXT:    sw $1, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $3, $BB2_11
+; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    sw $2, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    bnez $1, $BB2_11
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.10: # %b.PHI.1
 ; MIPS32-NEXT:    j $BB2_19
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_11: # %b.PHI.1.end
-; MIPS32-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 44($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 48
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_12: # %pre.PHI.2
-; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB2_14
+; MIPS32-NEXT:    lw $1, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB2_14
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.13: # %pre.PHI.2
 ; MIPS32-NEXT:    j $BB2_15
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_14: # %b.PHI.2.0
-; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB2_16
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_15: # %b.PHI.2.1
-; MIPS32-NEXT:    lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 0($1)
-; MIPS32-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lw $1, 36($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 4($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:  $BB2_16: # %b.PHI.2
-; MIPS32-NEXT:    lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $3, $2, 1
-; MIPS32-NEXT:    move $4, $1
-; MIPS32-NEXT:    sw $1, 0($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $3, $BB2_19
+; MIPS32-NEXT:    lw $1, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 4($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    sw $2, 0($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    bnez $1, $BB2_19
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.17: # %b.PHI.2
 ; MIPS32-NEXT:    j $BB2_18
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_18: # %b.PHI.2.end
 ; MIPS32-NEXT:    lw $1, 0($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $2, 44($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 48
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB2_19: # %b.PHI.3
-; MIPS32-NEXT:    lw $1, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $2, 8($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $3, 32($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $4, $3, 1
-; MIPS32-NEXT:    movn $1, $2, $4
-; MIPS32-NEXT:    lw $4, 36($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $5, $4, 1
-; MIPS32-NEXT:    move $6, $2
-; MIPS32-NEXT:    movn $6, $1, $5
-; MIPS32-NEXT:    lw $1, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    sw $6, 0($1)
-; MIPS32-NEXT:    sw $2, 0($1)
+; MIPS32-NEXT:    lw $2, 44($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $3, 24($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $5, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 12($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    move $4, $1
+; MIPS32-NEXT:    andi $5, $5, 1
+; MIPS32-NEXT:    movn $4, $1, $5
+; MIPS32-NEXT:    andi $5, $3, 1
+; MIPS32-NEXT:    move $3, $1
+; MIPS32-NEXT:    movn $3, $4, $5
+; MIPS32-NEXT:    sw $3, 0($2)
+; MIPS32-NEXT:    sw $1, 0($2)
 ; MIPS32-NEXT:    addiu $sp, $sp, 48
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    nop
@@ -572,40 +566,40 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl
 ; MIPS32:       # %bb.0: # %entry
 ; MIPS32-NEXT:    addiu $sp, $sp, -56
 ; MIPS32-NEXT:    .cfi_def_cfa_offset 56
+; MIPS32-NEXT:    sw $4, 24($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $5, 28($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $6, 32($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    sw $7, 36($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    addiu $1, $sp, 72
 ; MIPS32-NEXT:    lw $1, 0($1)
-; MIPS32-NEXT:    addiu $2, $sp, 76
-; MIPS32-NEXT:    lw $2, 0($2)
-; MIPS32-NEXT:    addiu $3, $sp, 80
-; MIPS32-NEXT:    lw $3, 0($3)
-; MIPS32-NEXT:    ori $8, $zero, 0
-; MIPS32-NEXT:    mtc1 $8, $f0
-; MIPS32-NEXT:    andi $8, $4, 1
-; MIPS32-NEXT:    sw $1, 52($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $4, 48($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $5, 44($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $6, 40($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $7, 36($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $2, 32($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    sw $3, 28($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    swc1 $f0, 24($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $8, $BB3_12
+; MIPS32-NEXT:    sw $1, 40($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $sp, 76
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 44($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    addiu $1, $sp, 80
+; MIPS32-NEXT:    lw $1, 0($1)
+; MIPS32-NEXT:    sw $1, 48($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    ori $1, $zero, 0
+; MIPS32-NEXT:    mtc1 $1, $f0
+; MIPS32-NEXT:    swc1 $f0, 52($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    andi $1, $4, 1
+; MIPS32-NEXT:    bnez $1, $BB3_12
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.1: # %entry
 ; MIPS32-NEXT:    j $BB3_2
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB3_2: # %pre.PHI.1
-; MIPS32-NEXT:    lw $1, 44($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB3_7
+; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB3_7
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.3: # %pre.PHI.1
 ; MIPS32-NEXT:    j $BB3_4
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB3_4: # %pre.PHI.1.0
-; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    bnez $2, $BB3_8
+; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    andi $1, $1, 1
+; MIPS32-NEXT:    bnez $1, $BB3_8
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.5: # %pre.PHI.1.0
 ; MIPS32-NEXT:    j $BB3_6
@@ -617,40 +611,39 @@ define void @long_chain_float_in_fpr(i1 %cnd0, i1 %cnd1, i1 %cnd2, float* %a, fl
 ; MIPS32-NEXT:    j $BB3_9
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB3_7: # %b.PHI.1.1
-; MIPS32-NEXT:    lw $1, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    lwc1 $f0, 0($1)
 ; MIPS32-NEXT:    swc1 $f0, 20($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:    j $BB3_9
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB3_8: # %b.PHI.1.2
-; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 44($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    lwc1 $f0, 0($1)
 ; MIPS32-NEXT:    swc1 $f0, 20($sp) # 4-byte Folded Spill
 ; MIPS32-NEXT:  $BB3_9: # %b.PHI.1
-; MIPS32-NEXT:    lwc1 $f0, 20($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $1, 40($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    andi $2, $1, 1
-; MIPS32-NEXT:    mov.s $f1, $f0
-; MIPS32-NEXT:    lwc1 $f2, 24($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    swc1 $f0, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    lwc1 $f0, 52($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 32($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lwc1 $f1, 20($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    swc1 $f1, 8($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    andi $1, $1, 1
 ; MIPS32-NEXT:    swc1 $f1, 12($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    swc1 $f2, 8($sp) # 4-byte Folded Spill
-; MIPS32-NEXT:    bnez $2, $BB3_11
+; MIPS32-NEXT:    swc1 $f0, 16($sp) # 4-byte Folded Spill
+; MIPS32-NEXT:    bnez $1, $BB3_11
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  # %bb.10: # %b.PHI.1
 ; MIPS32-NEXT:    j $BB3_19
 ; MIPS32-NEXT:    nop
 ; MIPS32-NEXT:  $BB3_11: # %b.PHI.1.end
-; MIPS32-NEXT:    lwc1 $f0, 16($sp) # 4-byte Folded Reload
-; MIPS32-NEXT:    lw $1, 28($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lwc1 $f0, 8($sp) # 4-byte Folded Reload
+; MIPS32-NEXT:    lw $1, 48($sp) # 4-byte Folded Reload
 ; MIPS32-NEXT:    swc1 $f0, 0($1)
 ; MIPS32-NEXT:    addi